Analysis ICU Plugin, closes #151
This commit is contained in:
parent
dfd002bf98
commit
11e4ad9bd6
|
@ -21,6 +21,7 @@
|
|||
<entry name="?*.yml" />
|
||||
<entry name="?*.txt" />
|
||||
<entry name="?*.pdf" />
|
||||
<entry name="?*.nrm" />
|
||||
</wildcardResourcePatterns>
|
||||
<annotationProcessing enabled="false" useClasspath="true" />
|
||||
</component>
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
<w>throwable</w>
|
||||
<w>tika</w>
|
||||
<w>timestamp</w>
|
||||
<w>tokenizers</w>
|
||||
<w>translog</w>
|
||||
<w>traslog</w>
|
||||
<w>trie</w>
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
<module fileurl="file://$PROJECT_DIR$/.idea/modules//benchmark-micro.iml" filepath="$PROJECT_DIR$/.idea/modules//benchmark-micro.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/modules/elasticsearch.iml" filepath="$PROJECT_DIR$/.idea/modules/elasticsearch.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/modules/elasticsearch-root.iml" filepath="$PROJECT_DIR$/.idea/modules/elasticsearch-root.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/modules/plugin-analysis-icu.iml" filepath="$PROJECT_DIR$/.idea/modules/plugin-analysis-icu.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/modules//plugin-client-groovy.iml" filepath="$PROJECT_DIR$/.idea/modules//plugin-client-groovy.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/modules//plugin-discovery-jgroups.iml" filepath="$PROJECT_DIR$/.idea/modules//plugin-discovery-jgroups.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/modules//plugin-mapper-attachments.iml" filepath="$PROJECT_DIR$/.idea/modules//plugin-mapper-attachments.iml" />
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
<orderEntry type="module" module-name="plugin-discovery-jgroups" />
|
||||
<orderEntry type="module" module-name="plugin-mapper-attachments" />
|
||||
<orderEntry type="module" module-name="plugin-transport-memcached" />
|
||||
<orderEntry type="module" module-name="plugin-analysis-icu" />
|
||||
<orderEntry type="module" module-name="test-integration" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/../../plugins/analysis/icu/build/classes/main" />
|
||||
<output-test url="file://$MODULE_DIR$/../../plugins/analysis/icu/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$/../../plugins/analysis/icu">
|
||||
<sourceFolder url="file://$MODULE_DIR$/../../plugins/analysis/icu/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/../../plugins/analysis/icu/src/test/java" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/../../plugins/analysis/icu/build" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="module" module-name="elasticsearch" />
|
||||
<orderEntry type="module-library">
|
||||
<library name="icu4j">
|
||||
<CLASSES>
|
||||
<root url="jar://$GRADLE_REPOSITORY$/com.ibm.icu/icu4j/jars/icu4j-4.4.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES>
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/charset/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/localespi/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/translit/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/langdata/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/collate/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/charset/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/collate/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/translit/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/core/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/core/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/regiondata/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/packaging/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/framework/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/currdata/src" />
|
||||
<root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/localespi/src" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</orderEntry>
|
||||
<orderEntry type="module" module-name="test-testng" />
|
||||
<orderEntry type="library" name="testng" level="project" />
|
||||
<orderEntry type="library" name="hamcrest" level="project" />
|
||||
<orderEntry type="module-library">
|
||||
<library name="lucene-collation">
|
||||
<CLASSES>
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-collation/jars/lucene-collation-3.0.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</orderEntry>
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -21,9 +21,13 @@ package org.elasticsearch.env;
|
|||
|
||||
import org.elasticsearch.cluster.ClusterName;
|
||||
import org.elasticsearch.util.Classes;
|
||||
import org.elasticsearch.util.io.Streams;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
|
@ -122,6 +126,10 @@ public class Environment {
|
|||
return logsFile;
|
||||
}
|
||||
|
||||
public String resolveConfigAndLoadToString(String path) throws FailedToResolveConfigException, IOException {
|
||||
return Streams.copyToString(new InputStreamReader(resolveConfig(path).openStream(), "UTF-8"));
|
||||
}
|
||||
|
||||
public URL resolveConfig(String path) throws FailedToResolveConfigException {
|
||||
// first, try it as a path on the file system
|
||||
File f1 = new File(path);
|
||||
|
|
|
@ -19,23 +19,46 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.util.gcommon.collect.Lists;
|
||||
import org.elasticsearch.util.guice.inject.AbstractModule;
|
||||
import org.elasticsearch.util.guice.inject.Scopes;
|
||||
import org.elasticsearch.util.guice.inject.assistedinject.FactoryProvider;
|
||||
import org.elasticsearch.util.guice.inject.multibindings.MapBinder;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author kimchy (Shay Banon)
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class AnalysisModule extends AbstractModule {
|
||||
|
||||
public static interface AnalysisBinderProcessor {
|
||||
void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings);
|
||||
|
||||
void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings);
|
||||
|
||||
void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings);
|
||||
}
|
||||
|
||||
private final Settings settings;
|
||||
|
||||
private final List<AnalysisBinderProcessor> processors = Lists.newArrayList();
|
||||
|
||||
public AnalysisModule(Settings settings) {
|
||||
this.settings = settings;
|
||||
processors.add(new DefaultProcessor());
|
||||
try {
|
||||
processors.add(new ExtendedProcessor());
|
||||
} catch (Throwable t) {
|
||||
// ignore. no extended ones
|
||||
}
|
||||
}
|
||||
|
||||
public AnalysisModule addProcessor(AnalysisBinderProcessor processor) {
|
||||
processors.add(processor);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override protected void configure() {
|
||||
|
@ -53,52 +76,9 @@ public class AnalysisModule extends AbstractModule {
|
|||
}
|
||||
tokenFilterBinder.addBinding(tokenFilterName).toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, type)).in(Scopes.SINGLETON);
|
||||
}
|
||||
// add defaults
|
||||
if (!tokenFiltersSettings.containsKey("stop")) {
|
||||
tokenFilterBinder.addBinding("stop").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StopTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("asciifolding")) {
|
||||
tokenFilterBinder.addBinding("asciifolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ASCIIFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("length")) {
|
||||
tokenFilterBinder.addBinding("length").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LengthTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("lowercase")) {
|
||||
tokenFilterBinder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LowerCaseTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("porterStem")) {
|
||||
tokenFilterBinder.addBinding("porterStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("standard")) {
|
||||
tokenFilterBinder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StandardTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("nGram")) {
|
||||
tokenFilterBinder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("edgeNGram")) {
|
||||
tokenFilterBinder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("shingle")) {
|
||||
tokenFilterBinder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
// extends defaults
|
||||
if (!tokenFiltersSettings.containsKey("arabicStem")) {
|
||||
tokenFilterBinder.addBinding("arabicStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("brazilianStem")) {
|
||||
tokenFilterBinder.addBinding("brazilianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("dutchStem")) {
|
||||
tokenFilterBinder.addBinding("dutchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("frenchStem")) {
|
||||
tokenFilterBinder.addBinding("frenchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("germanStem")) {
|
||||
tokenFilterBinder.addBinding("germanStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenFiltersSettings.containsKey("russianStem")) {
|
||||
tokenFilterBinder.addBinding("russianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
|
||||
for (AnalysisBinderProcessor processor : processors) {
|
||||
processor.processTokenFilters(tokenFilterBinder, tokenFiltersSettings);
|
||||
}
|
||||
|
||||
MapBinder<String, TokenizerFactoryFactory> tokenizerBinder
|
||||
|
@ -115,29 +95,10 @@ public class AnalysisModule extends AbstractModule {
|
|||
}
|
||||
tokenizerBinder.addBinding(tokenizerName).toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, type)).in(Scopes.SINGLETON);
|
||||
}
|
||||
// add defaults
|
||||
if (!tokenizersSettings.containsKey("standard")) {
|
||||
tokenizerBinder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, StandardTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenizersSettings.containsKey("keyword")) {
|
||||
tokenizerBinder.addBinding("keyword").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, KeywordTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenizersSettings.containsKey("letter")) {
|
||||
tokenizerBinder.addBinding("letter").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LetterTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenizersSettings.containsKey("lowercase")) {
|
||||
tokenizerBinder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LowerCaseTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenizersSettings.containsKey("whitespace")) {
|
||||
tokenizerBinder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, WhitespaceTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenizersSettings.containsKey("nGram")) {
|
||||
tokenizerBinder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!tokenizersSettings.containsKey("edgeNGram")) {
|
||||
tokenizerBinder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
|
||||
for (AnalysisBinderProcessor processor : processors) {
|
||||
processor.processTokenizers(tokenizerBinder, tokenizersSettings);
|
||||
}
|
||||
|
||||
MapBinder<String, AnalyzerProviderFactory> analyzerBinder
|
||||
= MapBinder.newMapBinder(binder(), String.class, AnalyzerProviderFactory.class);
|
||||
|
@ -160,6 +121,184 @@ public class AnalysisModule extends AbstractModule {
|
|||
analyzerBinder.addBinding(analyzerName).toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, type)).in(Scopes.SINGLETON);
|
||||
}
|
||||
|
||||
for (AnalysisBinderProcessor processor : processors) {
|
||||
processor.processAnalyzers(analyzerBinder, analyzersSettings);
|
||||
}
|
||||
|
||||
bind(AnalysisService.class).in(Scopes.SINGLETON);
|
||||
}
|
||||
|
||||
private static class DefaultProcessor implements AnalysisBinderProcessor {
|
||||
@Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
|
||||
// add defaults
|
||||
if (!groupSettings.containsKey("stop")) {
|
||||
binder.addBinding("stop").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StopTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("asciifolding")) {
|
||||
binder.addBinding("asciifolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ASCIIFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("length")) {
|
||||
binder.addBinding("length").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LengthTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("lowercase")) {
|
||||
binder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LowerCaseTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("porterStem")) {
|
||||
binder.addBinding("porterStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("porter_stem")) {
|
||||
binder.addBinding("porter_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("standard")) {
|
||||
binder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StandardTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("nGram")) {
|
||||
binder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("ngram")) {
|
||||
binder.addBinding("ngram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("edgeNGram")) {
|
||||
binder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("edge_ngram")) {
|
||||
binder.addBinding("edge_ngram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("shingle")) {
|
||||
binder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
|
||||
// add defaults
|
||||
if (!groupSettings.containsKey("standard")) {
|
||||
binder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, StandardTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("keyword")) {
|
||||
binder.addBinding("keyword").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, KeywordTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("letter")) {
|
||||
binder.addBinding("letter").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LetterTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("lowercase")) {
|
||||
binder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LowerCaseTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("whitespace")) {
|
||||
binder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, WhitespaceTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
|
||||
if (!groupSettings.containsKey("standard")) {
|
||||
binder.addBinding("standard").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, StandardAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("simple")) {
|
||||
binder.addBinding("simple").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, SimpleAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("stop")) {
|
||||
binder.addBinding("stop").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, StopAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("whitespace")) {
|
||||
binder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, WhitespaceAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("keyword")) {
|
||||
binder.addBinding("keyword").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, KeywordAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class ExtendedProcessor implements AnalysisBinderProcessor {
|
||||
@Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
|
||||
if (!groupSettings.containsKey("arabicStem")) {
|
||||
binder.addBinding("arabicStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("arabic_stem")) {
|
||||
binder.addBinding("arabic_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("brazilianStem")) {
|
||||
binder.addBinding("brazilianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("brazilian_stem")) {
|
||||
binder.addBinding("brazilian_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("dutchStem")) {
|
||||
binder.addBinding("dutchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("dutch_stem")) {
|
||||
binder.addBinding("dutch_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("frenchStem")) {
|
||||
binder.addBinding("frenchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("french_stem")) {
|
||||
binder.addBinding("french_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("germanStem")) {
|
||||
binder.addBinding("germanStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("german_stem")) {
|
||||
binder.addBinding("german_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("russianStem")) {
|
||||
binder.addBinding("russianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("russian_stem")) {
|
||||
binder.addBinding("russian_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
|
||||
if (!groupSettings.containsKey("nGram")) {
|
||||
binder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("ngram")) {
|
||||
binder.addBinding("ngram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("edgeNGram")) {
|
||||
binder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("edge_ngram")) {
|
||||
binder.addBinding("edge_ngram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
|
||||
if (!groupSettings.containsKey("arabic")) {
|
||||
binder.addBinding("arabic").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ArabicAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("brazilian")) {
|
||||
binder.addBinding("brazilian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, BrazilianAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("chinese")) {
|
||||
binder.addBinding("chinese").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ChineseAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("cjk")) {
|
||||
binder.addBinding("cjk").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ChineseAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("czech")) {
|
||||
binder.addBinding("czech").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, CzechAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("dutch")) {
|
||||
binder.addBinding("dutch").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, DutchAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("french")) {
|
||||
binder.addBinding("french").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, FrenchAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("german")) {
|
||||
binder.addBinding("german").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, GermanAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("greek")) {
|
||||
binder.addBinding("greek").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, GreekAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("persian")) {
|
||||
binder.addBinding("persian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, PersianAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("russian")) {
|
||||
binder.addBinding("russian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, RussianAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("thai")) {
|
||||
binder.addBinding("thai").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ThaiAnalyzerProvider.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,22 +74,6 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
}
|
||||
}
|
||||
|
||||
// add some defaults
|
||||
if (!analyzerProviders.containsKey("standard")) {
|
||||
analyzerProviders.put("standard", new StandardAnalyzerProvider(index, indexSettings, "standard", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("simple")) {
|
||||
analyzerProviders.put("simple", new SimpleAnalyzerProvider(index, indexSettings, "simple", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("stop")) {
|
||||
analyzerProviders.put("stop", new StopAnalyzerProvider(index, indexSettings, "stop", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("whitespace")) {
|
||||
analyzerProviders.put("whitespace", new WhitespaceAnalyzerProvider(index, indexSettings, "whitespace", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("keyword")) {
|
||||
analyzerProviders.put("keyword", new KeywordAnalyzerProvider(index, indexSettings, "keyword", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("default")) {
|
||||
analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, "default", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
|
@ -100,45 +84,6 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
analyzerProviders.put("default_search", analyzerProviders.get("default"));
|
||||
}
|
||||
|
||||
// extended analyzers defaults
|
||||
if (!analyzerProviders.containsKey("arabic")) {
|
||||
analyzerProviders.put("arabic", new ArabicAnalyzerProvider(index, indexSettings, "arabic", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("brazilian")) {
|
||||
analyzerProviders.put("brazilian", new BrazilianAnalyzerProvider(index, indexSettings, "brazilian", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("chinese")) {
|
||||
analyzerProviders.put("chinese", new ChineseAnalyzerProvider(index, indexSettings, "chinese", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("cjk")) {
|
||||
analyzerProviders.put("cjk", new ChineseAnalyzerProvider(index, indexSettings, "cjk", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("czech")) {
|
||||
analyzerProviders.put("czech", new CzechAnalyzerProvider(index, indexSettings, "czech", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("dutch")) {
|
||||
analyzerProviders.put("dutch", new DutchAnalyzerProvider(index, indexSettings, "dutch", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("french")) {
|
||||
analyzerProviders.put("french", new FrenchAnalyzerProvider(index, indexSettings, "french", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("german")) {
|
||||
analyzerProviders.put("german", new GermanAnalyzerProvider(index, indexSettings, "german", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("greek")) {
|
||||
analyzerProviders.put("greek", new GreekAnalyzerProvider(index, indexSettings, "greek", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("persian")) {
|
||||
analyzerProviders.put("persian", new PersianAnalyzerProvider(index, indexSettings, "persian", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("russian")) {
|
||||
analyzerProviders.put("russian", new RussianAnalyzerProvider(index, indexSettings, "russian", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("thai")) {
|
||||
analyzerProviders.put("thai", new ThaiAnalyzerProvider(index, indexSettings, "thai", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
|
||||
|
||||
this.analyzerProviders = ImmutableMap.copyOf(analyzerProviders);
|
||||
|
||||
Map<String, NamedAnalyzer> analyzers = newHashMap();
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.index.service;
|
||||
|
||||
import org.elasticsearch.util.gcommon.collect.ImmutableMap;
|
||||
import org.elasticsearch.util.gcommon.collect.Lists;
|
||||
import org.elasticsearch.util.gcommon.collect.UnmodifiableIterator;
|
||||
import org.elasticsearch.util.guice.inject.Inject;
|
||||
import org.elasticsearch.util.guice.inject.Injector;
|
||||
|
@ -56,9 +57,11 @@ import org.elasticsearch.plugins.PluginsService;
|
|||
import org.elasticsearch.plugins.ShardsPluginsModule;
|
||||
import org.elasticsearch.util.component.CloseableIndexComponent;
|
||||
import org.elasticsearch.util.guice.Injectors;
|
||||
import org.elasticsearch.util.guice.inject.Module;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -189,16 +192,20 @@ public class InternalIndexService extends AbstractIndexComponent implements Inde
|
|||
|
||||
logger.debug("Creating shard_id[{}]", shardId.id());
|
||||
|
||||
Injector shardInjector = injector.createChildInjector(
|
||||
new ShardsPluginsModule(indexSettings, pluginsService),
|
||||
new IndexShardModule(shardId),
|
||||
new StoreModule(indexSettings),
|
||||
new DeletionPolicyModule(indexSettings),
|
||||
new MergePolicyModule(indexSettings),
|
||||
new MergeSchedulerModule(indexSettings),
|
||||
new TranslogModule(indexSettings),
|
||||
new EngineModule(indexSettings),
|
||||
new IndexShardGatewayModule(injector.getInstance(IndexGateway.class)));
|
||||
List<Module> modules = Lists.newArrayList();
|
||||
modules.add(new ShardsPluginsModule(indexSettings, pluginsService));
|
||||
modules.add(new IndexShardModule(shardId));
|
||||
modules.add(new StoreModule(indexSettings));
|
||||
modules.add(new DeletionPolicyModule(indexSettings));
|
||||
modules.add(new MergePolicyModule(indexSettings));
|
||||
modules.add(new MergeSchedulerModule(indexSettings));
|
||||
modules.add(new TranslogModule(indexSettings));
|
||||
modules.add(new EngineModule(indexSettings));
|
||||
modules.add(new IndexShardGatewayModule(injector.getInstance(IndexGateway.class)));
|
||||
|
||||
pluginsService.processModules(modules);
|
||||
|
||||
Injector shardInjector = injector.createChildInjector(modules);
|
||||
|
||||
shardsInjectors = newMapBuilder(shardsInjectors).put(shardId.id(), shardInjector).immutableMap();
|
||||
|
||||
|
|
|
@ -47,8 +47,10 @@ import org.elasticsearch.util.component.AbstractLifecycleComponent;
|
|||
import org.elasticsearch.util.component.CloseableIndexComponent;
|
||||
import org.elasticsearch.util.concurrent.ThreadSafe;
|
||||
import org.elasticsearch.util.guice.Injectors;
|
||||
import org.elasticsearch.util.guice.inject.Module;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -167,19 +169,23 @@ public class InternalIndicesService extends AbstractLifecycleComponent<IndicesSe
|
|||
.globalSettings(settings.getGlobalSettings())
|
||||
.build();
|
||||
|
||||
Injector indexInjector = injector.createChildInjector(
|
||||
new IndexNameModule(index),
|
||||
new LocalNodeIdModule(localNodeId),
|
||||
new IndexSettingsModule(indexSettings),
|
||||
new IndicesPluginsModule(indexSettings, pluginsService),
|
||||
new AnalysisModule(indexSettings),
|
||||
new SimilarityModule(indexSettings),
|
||||
new IndexCacheModule(indexSettings),
|
||||
new IndexQueryParserModule(indexSettings),
|
||||
new MapperServiceModule(),
|
||||
new IndexGatewayModule(indexSettings, injector.getInstance(Gateway.class)),
|
||||
new OperationRoutingModule(indexSettings),
|
||||
new IndexModule());
|
||||
ArrayList<Module> modules = new ArrayList<Module>();
|
||||
modules.add(new IndexNameModule(index));
|
||||
modules.add(new LocalNodeIdModule(localNodeId));
|
||||
modules.add(new IndexSettingsModule(indexSettings));
|
||||
modules.add(new IndicesPluginsModule(indexSettings, pluginsService));
|
||||
modules.add(new AnalysisModule(indexSettings));
|
||||
modules.add(new SimilarityModule(indexSettings));
|
||||
modules.add(new IndexCacheModule(indexSettings));
|
||||
modules.add(new IndexQueryParserModule(indexSettings));
|
||||
modules.add(new MapperServiceModule());
|
||||
modules.add(new IndexGatewayModule(indexSettings, injector.getInstance(Gateway.class)));
|
||||
modules.add(new OperationRoutingModule(indexSettings));
|
||||
modules.add(new IndexModule());
|
||||
|
||||
pluginsService.processModules(modules);
|
||||
|
||||
Injector indexInjector = injector.createChildInjector(modules);
|
||||
|
||||
indicesInjectors.put(index.name(), indexInjector);
|
||||
|
||||
|
|
|
@ -131,6 +131,7 @@ public final class InternalNode implements Node {
|
|||
modules.add(new GatewayModule(settings));
|
||||
modules.add(new NodeClientModule());
|
||||
|
||||
pluginsService.processModules(modules);
|
||||
|
||||
injector = Guice.createInjector(modules);
|
||||
|
||||
|
|
|
@ -74,4 +74,8 @@ public abstract class AbstractPlugin implements Plugin {
|
|||
@Override public Collection<Class<? extends CloseableIndexComponent>> shardServices() {
|
||||
return ImmutableList.of();
|
||||
}
|
||||
|
||||
@Override public void processModule(Module module) {
|
||||
// nothing to do here
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,4 +71,6 @@ public interface Plugin {
|
|||
* Per index shard service that will be automatically closed.
|
||||
*/
|
||||
Collection<Class<? extends CloseableIndexComponent>> shardServices();
|
||||
|
||||
void processModule(Module module);
|
||||
}
|
||||
|
|
|
@ -70,6 +70,14 @@ public class PluginsService extends AbstractComponent {
|
|||
return this.settings;
|
||||
}
|
||||
|
||||
public void processModules(Iterable<Module> modules) {
|
||||
for (Module module : modules) {
|
||||
for (Plugin plugin : plugins.values()) {
|
||||
plugin.processModule(module);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<Class<? extends Module>> modules() {
|
||||
List<Class<? extends Module>> modules = Lists.newArrayList();
|
||||
for (Plugin plugin : plugins.values()) {
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.util.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
// TODO Lucene Monitor: Once 3.1 is out, no need for this class anymore, use CharTermAttribute
|
||||
public class CharSequenceTermAttribute implements CharSequence {
|
||||
|
||||
private final TermAttribute termAtt;
|
||||
|
||||
public CharSequenceTermAttribute(TermAttribute termAtt) {
|
||||
this.termAtt = termAtt;
|
||||
}
|
||||
|
||||
@Override public int length() {
|
||||
return termAtt.termLength();
|
||||
}
|
||||
|
||||
@Override public char charAt(int index) {
|
||||
if (index >= length())
|
||||
throw new IndexOutOfBoundsException();
|
||||
return termAtt.termBuffer()[index];
|
||||
}
|
||||
|
||||
@Override public CharSequence subSequence(int start, int end) {
|
||||
if (start > length() || end > length())
|
||||
throw new IndexOutOfBoundsException();
|
||||
return new String(termAtt.termBuffer(), start, end - start);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
dependsOn(':elasticsearch')
|
||||
|
||||
apply plugin: 'java'
|
||||
apply plugin: 'maven'
|
||||
|
||||
archivesBaseName = "elasticsearch-analysis-icu"
|
||||
|
||||
explodedDistDir = new File(distsDir, 'exploded')
|
||||
|
||||
manifest.mainAttributes("Implementation-Title": "ElasticSearch::Plugins::Analysis::ICU", "Implementation-Version": rootProject.version, "Implementation-Date": buildTimeStr)
|
||||
|
||||
configurations.compile.transitive = true
|
||||
configurations.testCompile.transitive = true
|
||||
|
||||
// no need to use the resource dir
|
||||
sourceSets.main.resources.srcDirs 'src/main/java'
|
||||
sourceSets.test.resources.srcDirs 'src/test/java'
|
||||
|
||||
// add the source files to the dist jar
|
||||
//jar {
|
||||
// from sourceSets.main.allJava
|
||||
//}
|
||||
|
||||
configurations {
|
||||
dists
|
||||
distLib {
|
||||
visible = false
|
||||
transitive = false
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
compile project(':elasticsearch')
|
||||
|
||||
compile('com.ibm.icu:icu4j:4.4') { transitive = false }
|
||||
distLib('com.ibm.icu:icu4j:4.4') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-collation:3.0.1') { transitive = false }
|
||||
distLib('org.apache.lucene:lucene-collation:3.0.1') { transitive = false }
|
||||
|
||||
|
||||
testCompile project(':test-testng')
|
||||
testCompile('org.testng:testng:5.10:jdk15') { transitive = false }
|
||||
testCompile 'org.hamcrest:hamcrest-all:1.1'
|
||||
}
|
||||
|
||||
test {
|
||||
useTestNG()
|
||||
jmvArgs = ["-ea", "-Xmx1024m"]
|
||||
suiteName = project.name
|
||||
listeners = ["org.elasticsearch.util.testng.Listeners"]
|
||||
systemProperties["es.test.log.conf"] = System.getProperty("es.test.log.conf", "log4j-gradle.properties")
|
||||
}
|
||||
|
||||
task explodedDist(dependsOn: [jar], description: 'Builds the plugin zip file') << {
|
||||
[explodedDistDir]*.mkdirs()
|
||||
|
||||
copy {
|
||||
from configurations.distLib
|
||||
into explodedDistDir
|
||||
}
|
||||
|
||||
// remove elasticsearch files (compile above adds the elasticsearch one)
|
||||
ant.delete { fileset(dir: explodedDistDir, includes: "elasticsearch-*.jar") }
|
||||
|
||||
copy {
|
||||
from libsDir
|
||||
into explodedDistDir
|
||||
}
|
||||
|
||||
ant.delete { fileset(dir: explodedDistDir, includes: "elasticsearch-*-javadoc.jar") }
|
||||
ant.delete { fileset(dir: explodedDistDir, includes: "elasticsearch-*-sources.jar") }
|
||||
}
|
||||
|
||||
task zip(type: Zip, dependsOn: ['explodedDist']) {
|
||||
from(explodedDistDir) {
|
||||
}
|
||||
}
|
||||
|
||||
task release(dependsOn: [zip]) << {
|
||||
ant.delete(dir: explodedDistDir)
|
||||
copy {
|
||||
from distsDir
|
||||
into(new File(rootProject.distsDir, "plugins"))
|
||||
}
|
||||
}
|
||||
|
||||
configurations {
|
||||
deployerJars
|
||||
}
|
||||
|
||||
dependencies {
|
||||
deployerJars "org.apache.maven.wagon:wagon-http:1.0-beta-2"
|
||||
}
|
||||
|
||||
task sourcesJar(type: Jar, dependsOn: classes) {
|
||||
classifier = 'sources'
|
||||
from sourceSets.main.allSource
|
||||
}
|
||||
|
||||
task javadocJar(type: Jar, dependsOn: javadoc) {
|
||||
classifier = 'javadoc'
|
||||
from javadoc.destinationDir
|
||||
}
|
||||
|
||||
artifacts {
|
||||
archives sourcesJar
|
||||
archives javadocJar
|
||||
}
|
||||
|
||||
uploadArchives {
|
||||
repositories.mavenDeployer {
|
||||
configuration = configurations.deployerJars
|
||||
repository(url: rootProject.mavenRepoUrl) {
|
||||
authentication(userName: rootProject.mavenRepoUser, password: rootProject.mavenRepoPass)
|
||||
}
|
||||
snapshotRepository(url: rootProject.mavenSnapshotRepoUrl) {
|
||||
authentication(userName: rootProject.mavenRepoUser, password: rootProject.mavenRepoPass)
|
||||
}
|
||||
|
||||
pom.project {
|
||||
inceptionYear '2009'
|
||||
name 'elasticsearch-plugins-analysis-icu'
|
||||
description 'Attachments Plugin for ElasticSearch'
|
||||
licenses {
|
||||
license {
|
||||
name 'The Apache Software License, Version 2.0'
|
||||
url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
|
||||
distribution 'repo'
|
||||
}
|
||||
}
|
||||
scm {
|
||||
connection 'git://github.com/elasticsearch/elasticsearch.git'
|
||||
developerConnection 'git@github.com:elasticsearch/elasticsearch.git'
|
||||
url 'http://github.com/elasticsearch/elasticsearch'
|
||||
}
|
||||
}
|
||||
|
||||
pom.whenConfigured {pom ->
|
||||
pom.dependencies = pom.dependencies.findAll {dep -> dep.scope != 'test' } // removes the test scoped ones
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
plugin=org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* A TokenFilter that applies search term folding to Unicode text,
|
||||
* applying foldings from UTR#30 Character Foldings.
|
||||
* <p>
|
||||
* This filter applies the following foldings from the report to unicode text:
|
||||
* <ul>
|
||||
* <li>Accent removal
|
||||
* <li>Case folding
|
||||
* <li>Canonical duplicates folding
|
||||
* <li>Dashes folding
|
||||
* <li>Diacritic removal (including stroke, hook, descender)
|
||||
* <li>Greek letterforms folding
|
||||
* <li>Han Radical folding
|
||||
* <li>Hebrew Alternates folding
|
||||
* <li>Jamo folding
|
||||
* <li>Letterforms folding
|
||||
* <li>Math symbol folding
|
||||
* <li>Multigraph Expansions: All
|
||||
* <li>Native digit folding
|
||||
* <li>No-break folding
|
||||
* <li>Overline folding
|
||||
* <li>Positional forms folding
|
||||
* <li>Small forms folding
|
||||
* <li>Space folding
|
||||
* <li>Spacing Accents folding
|
||||
* <li>Subscript folding
|
||||
* <li>Superscript folding
|
||||
* <li>Suzhou Numeral folding
|
||||
* <li>Symbol folding
|
||||
* <li>Underline folding
|
||||
* <li>Vertical forms folding
|
||||
* <li>Width folding
|
||||
* </ul>
|
||||
* <p>
|
||||
* Additionally, Default Ignorables are removed, and text is normalized to NFKC.
|
||||
* All foldings, case folding, and normalization mappings are applied recursively
|
||||
* to ensure a fully folded and normalized result.
|
||||
* </p>
|
||||
*/
|
||||
public final class ICUFoldingFilter extends ICUNormalizer2Filter {
|
||||
private static final Normalizer2 normalizer = Normalizer2.getInstance(ICUFoldingFilter.class.getResourceAsStream("utr30.nrm"),
|
||||
"utr30", Normalizer2.Mode.COMPOSE);
|
||||
|
||||
/**
|
||||
* Create a new ICUFoldingFilter on the specified input
|
||||
*/
|
||||
public ICUFoldingFilter(TokenStream input) {
|
||||
super(input, normalizer);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.elasticsearch.util.lucene.analysis.CharSequenceTermAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Normalize token text with ICU's {@link com.ibm.icu.text.Normalizer2}
|
||||
* <p>
|
||||
* With this filter, you can normalize text in the following ways:
|
||||
* <ul>
|
||||
* <li> NFKC Normalization, Case Folding, and removing Ignorables (the default)
|
||||
* <li> Using a standard Normalization mode (NFC, NFD, NFKC, NFKD)
|
||||
* <li> Based on rules from a custom normalization mapping.
|
||||
* </ul>
|
||||
* <p>
|
||||
* If you use the defaults, this filter is a simple way to standardize Unicode text
|
||||
* in a language-independent way for search:
|
||||
* <ul>
|
||||
* <li> The case folding that it does can be seen as a replacement for
|
||||
* LowerCaseFilter: For example, it handles cases such as the Greek sigma, so that
|
||||
* "Μάϊος" and "ΜΆΪΟΣ" will match correctly.
|
||||
* <li> The normalization will standardizes different forms of the same
|
||||
* character in Unicode. For example, CJK full-width numbers will be standardized
|
||||
* to their ASCII forms.
|
||||
* <li> Ignorables such as Zero-Width Joiner and Variation Selectors are removed.
|
||||
* These are typically modifier characters that affect display.
|
||||
* </ul>
|
||||
*
|
||||
* @see com.ibm.icu.text.Normalizer2
|
||||
* @see com.ibm.icu.text.FilteredNormalizer2
|
||||
*/
|
||||
// TODO Lucene Monitor: Once 3.1 is released use it instead
|
||||
public class ICUNormalizer2Filter extends TokenFilter {
|
||||
|
||||
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
|
||||
private final Normalizer2 normalizer;
|
||||
|
||||
private final StringBuilder buffer = new StringBuilder();
|
||||
|
||||
private final CharSequenceTermAttribute charSequenceTermAtt;
|
||||
|
||||
/**
|
||||
* Create a new Normalizer2Filter that combines NFKC normalization, Case
|
||||
* Folding, and removes Default Ignorables (NFKC_Casefold)
|
||||
*/
|
||||
public ICUNormalizer2Filter(TokenStream input) {
|
||||
this(input, Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Normalizer2Filter with the specified Normalizer2
|
||||
*
|
||||
* @param input stream
|
||||
* @param normalizer normalizer to use
|
||||
*/
|
||||
public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) {
|
||||
super(input);
|
||||
this.normalizer = normalizer;
|
||||
this.charSequenceTermAtt = new CharSequenceTermAttribute(termAtt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (normalizer.quickCheck(charSequenceTermAtt) != Normalizer.YES) {
|
||||
buffer.setLength(0);
|
||||
normalizer.normalize(charSequenceTermAtt, buffer);
|
||||
termAtt.setTermBuffer(buffer.toString());
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.util.guice.inject.Scopes;
|
||||
import org.elasticsearch.util.guice.inject.assistedinject.FactoryProvider;
|
||||
import org.elasticsearch.util.guice.inject.multibindings.MapBinder;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class IcuAnalysisBinderProcessor implements AnalysisModule.AnalysisBinderProcessor {
|
||||
|
||||
@Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
|
||||
if (!groupSettings.containsKey("icuNormalizer")) {
|
||||
binder.addBinding("icuNormalizer").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuNormalizerTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("icu_normalizer")) {
|
||||
binder.addBinding("icu_normalizer").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuNormalizerTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
|
||||
if (!groupSettings.containsKey("icuFolding")) {
|
||||
binder.addBinding("icuFolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("icu_folding")) {
|
||||
binder.addBinding("icu_folding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
|
||||
if (!groupSettings.containsKey("icuCollation")) {
|
||||
binder.addBinding("icuCollation").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuCollationTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
if (!groupSettings.containsKey("icu_collation")) {
|
||||
binder.addBinding("icu_collation").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuCollationTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
|
||||
}
|
||||
|
||||
@Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
|
||||
}
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.collation.ICUCollationKeyFilter;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.env.FailedToResolveConfigException;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.util.guice.inject.Inject;
|
||||
import org.elasticsearch.util.guice.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* An ICU based collation token filter. There are two ways to configure collation:
|
||||
*
|
||||
* <p>The first is simply specifying the locale (defaults to the default locale). The <tt>language</tt>
|
||||
* parameter is the lowercase two-letter ISO-639 code. An additional <tt>country</tt> and <tt>variant</tt>
|
||||
* can be provided.
|
||||
*
|
||||
* <p>The second option is to specify collation rules as defined in the <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
|
||||
* Collation customization</a> chapter in icu docs. The <tt>rules</tt> parameter can either embed the rules definition
|
||||
* in the settings or refer to an external location (preferable located under the <tt>config</tt> location, relative to it).
|
||||
*
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Collator collator;
|
||||
|
||||
@Inject public IcuCollationTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
|
||||
Collator collator;
|
||||
String rules = settings.get("rules");
|
||||
if (rules != null) {
|
||||
FailedToResolveConfigException failureToResolve = null;
|
||||
try {
|
||||
rules = environment.resolveConfigAndLoadToString(rules);
|
||||
} catch (FailedToResolveConfigException e) {
|
||||
failureToResolve = e;
|
||||
} catch (IOException e) {
|
||||
throw new ElasticSearchIllegalArgumentException("Failed to load collation rules", e);
|
||||
}
|
||||
try {
|
||||
collator = new RuleBasedCollator(rules);
|
||||
} catch (Exception e) {
|
||||
if (failureToResolve != null) {
|
||||
throw new ElasticSearchIllegalArgumentException("Failed to resolve collation rules location", failureToResolve);
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("Failed to parse collation rules", e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
String language = settings.get("language");
|
||||
if (language != null) {
|
||||
Locale locale;
|
||||
String country = settings.get("country");
|
||||
if (country != null) {
|
||||
String variant = settings.get("variant");
|
||||
if (variant != null) {
|
||||
locale = new Locale(language, country, variant);
|
||||
} else {
|
||||
locale = new Locale(language, country);
|
||||
}
|
||||
} else {
|
||||
locale = new Locale(language);
|
||||
}
|
||||
collator = Collator.getInstance(locale);
|
||||
} else {
|
||||
collator = Collator.getInstance();
|
||||
}
|
||||
}
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new ICUCollationKeyFilter(tokenStream, collator);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.util.guice.inject.Inject;
|
||||
import org.elasticsearch.util.guice.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public IcuFoldingTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new ICUFoldingFilter(tokenStream);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.util.guice.inject.Inject;
|
||||
import org.elasticsearch.util.guice.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
|
||||
/**
|
||||
* Uses the {@link org.elasticsearch.index.analysis.ICUNormalizer2Filter} to normalize tokens.
|
||||
*
|
||||
* <p>The <tt>name</tt> can be used to provide the type of normalization to perofrm.
|
||||
*
|
||||
* @author kimchy (shay.banon)
|
||||
* @see org.elasticsearch.index.analysis.ICUNormalizer2Filter
|
||||
*/
|
||||
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final String name;
|
||||
|
||||
@Inject public IcuNormalizerTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
this.name = settings.get("name", "nfkc_cf");
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new ICUNormalizer2Filter(tokenStream, Normalizer2.getInstance(null, name, Normalizer2.Mode.COMPOSE));
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.analysis.icu;
|
||||
|
||||
import org.elasticsearch.index.analysis.AnalysisModule;
|
||||
import org.elasticsearch.index.analysis.IcuAnalysisBinderProcessor;
|
||||
import org.elasticsearch.plugins.AbstractPlugin;
|
||||
import org.elasticsearch.util.guice.inject.Module;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class AnalysisICUPlugin extends AbstractPlugin {
|
||||
|
||||
@Override public String name() {
|
||||
return "analysis-icu";
|
||||
}
|
||||
|
||||
@Override public String description() {
|
||||
return "UTF related ICU analysis support";
|
||||
}
|
||||
|
||||
@Override public void processModule(Module module) {
|
||||
if (module instanceof AnalysisModule) {
|
||||
AnalysisModule analysisModule = (AnalysisModule) module;
|
||||
analysisModule.addProcessor(new IcuAnalysisBinderProcessor());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.text.Normalizer;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class Normalizer2Tests {
|
||||
|
||||
@Test public void testNormalizer2() {
|
||||
Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
|
||||
MatcherAssert.assertThat(normalizer.normalize("Jordania"), Matchers.equalTo("jordania"));
|
||||
MatcherAssert.assertThat(normalizer.normalize("João"), Matchers.equalTo("joão"));
|
||||
|
||||
MatcherAssert.assertThat(Normalizer.normalize("Jordania", Normalizer.Form.NFKC), Matchers.equalTo("Jordania"));
|
||||
MatcherAssert.assertThat(Normalizer.normalize("João", Normalizer.Form.NFKC), Matchers.equalTo("João"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexNameModule;
|
||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.util.guice.inject.Guice;
|
||||
import org.elasticsearch.util.guice.inject.Injector;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.elasticsearch.util.settings.ImmutableSettings.Builder.*;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class SimpleIcuAnalysisTests {
|
||||
|
||||
@Test public void testDefaultsIcuAnalysis() {
|
||||
Index index = new Index("test");
|
||||
Injector injector = Guice.createInjector(
|
||||
new IndexSettingsModule(EMPTY_SETTINGS),
|
||||
new IndexNameModule(index),
|
||||
new AnalysisModule(EMPTY_SETTINGS).addProcessor(new IcuAnalysisBinderProcessor()));
|
||||
|
||||
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
|
||||
|
||||
TokenFilterFactory filterFactory = analysisService.tokenFilter("icu_normalizer");
|
||||
MatcherAssert.assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));
|
||||
}
|
||||
}
|
|
@ -117,7 +117,7 @@ uploadArchives {
|
|||
|
||||
pom.project {
|
||||
inceptionYear '2009'
|
||||
name 'elasticsearch-plugins-attachments'
|
||||
name 'elasticsearch-plugins-mapper-attachments'
|
||||
description 'Attachments Plugin for ElasticSearch'
|
||||
licenses {
|
||||
license {
|
||||
|
|
|
@ -1 +1 @@
|
|||
plugin=org.elasticsearch.plugin.attachments.AttachmentsPlugin
|
||||
plugin=org.elasticsearch.plugin.attachments.MapperAttachmentsPlugin
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.elasticsearch.plugin.attachments.index.mapper.JsonAttachmentMapperSer
|
|||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class AttachmentsIndexModule extends AbstractModule {
|
||||
public class MapperAttachmentsIndexModule extends AbstractModule {
|
||||
|
||||
@Override protected void configure() {
|
||||
bind(JsonAttachmentMapperService.class).asEagerSingleton();
|
|
@ -29,7 +29,7 @@ import static org.elasticsearch.util.gcommon.collect.Lists.*;
|
|||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class AttachmentsPlugin extends AbstractPlugin {
|
||||
public class MapperAttachmentsPlugin extends AbstractPlugin {
|
||||
|
||||
@Override public String name() {
|
||||
return "mapper-attachments";
|
||||
|
@ -41,7 +41,7 @@ public class AttachmentsPlugin extends AbstractPlugin {
|
|||
|
||||
@Override public Collection<Class<? extends Module>> indexModules() {
|
||||
Collection<Class<? extends Module>> modules = newArrayList();
|
||||
modules.add(AttachmentsIndexModule.class);
|
||||
modules.add(MapperAttachmentsIndexModule.class);
|
||||
return modules;
|
||||
}
|
||||
}
|
|
@ -6,6 +6,7 @@ include 'test-integration'
|
|||
|
||||
include 'benchmark-micro'
|
||||
|
||||
include 'plugins-analysis-icu'
|
||||
include 'plugins-mapper-attachments'
|
||||
include 'plugins-client-groovy'
|
||||
include 'plugins-transport-memcached'
|
||||
|
|
Loading…
Reference in New Issue