Analysis ICU Plugin, closes #151

2010-04-27 23:54:30 +03:00 · 2010-04-27 23:54:30 +03:00 · 11e4ad9bd6
parent dfd002bf98
commit 11e4ad9bd6
32 changed files with 1097 additions and 152 deletions
--- a/.idea/compiler.xml
+++ b/.idea/compiler.xml
@ -21,6 +21,7 @@
      <entry name="?*.yml" />
      <entry name="?*.txt" />
      <entry name="?*.pdf" />
+      <entry name="?*.nrm" />
    </wildcardResourcePatterns>
    <annotationProcessing enabled="false" useClasspath="true" />
  </component>
--- a/.idea/dictionaries/kimchy.xml
+++ b/.idea/dictionaries/kimchy.xml
@ -69,6 +69,7 @@
      <w>throwable</w>
      <w>tika</w>
      <w>timestamp</w>
+      <w>tokenizers</w>
      <w>translog</w>
      <w>traslog</w>
      <w>trie</w>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -5,6 +5,7 @@
      <module fileurl="file://$PROJECT_DIR$/.idea/modules//benchmark-micro.iml" filepath="$PROJECT_DIR$/.idea/modules//benchmark-micro.iml" />
      <module fileurl="file://$PROJECT_DIR$/.idea/modules/elasticsearch.iml" filepath="$PROJECT_DIR$/.idea/modules/elasticsearch.iml" />
      <module fileurl="file://$PROJECT_DIR$/.idea/modules/elasticsearch-root.iml" filepath="$PROJECT_DIR$/.idea/modules/elasticsearch-root.iml" />
+      <module fileurl="file://$PROJECT_DIR$/.idea/modules/plugin-analysis-icu.iml" filepath="$PROJECT_DIR$/.idea/modules/plugin-analysis-icu.iml" />
      <module fileurl="file://$PROJECT_DIR$/.idea/modules//plugin-client-groovy.iml" filepath="$PROJECT_DIR$/.idea/modules//plugin-client-groovy.iml" />
      <module fileurl="file://$PROJECT_DIR$/.idea/modules//plugin-discovery-jgroups.iml" filepath="$PROJECT_DIR$/.idea/modules//plugin-discovery-jgroups.iml" />
      <module fileurl="file://$PROJECT_DIR$/.idea/modules//plugin-mapper-attachments.iml" filepath="$PROJECT_DIR$/.idea/modules//plugin-mapper-attachments.iml" />
--- a/.idea/modules/elasticsearch-root.iml
+++ b/.idea/modules/elasticsearch-root.iml
@ -17,6 +17,7 @@
    <orderEntry type="module" module-name="plugin-discovery-jgroups" />
    <orderEntry type="module" module-name="plugin-mapper-attachments" />
    <orderEntry type="module" module-name="plugin-transport-memcached" />
+    <orderEntry type="module" module-name="plugin-analysis-icu" />
    <orderEntry type="module" module-name="test-integration" />
  </component>
 </module>
--- a/.idea/modules/plugin-analysis-icu.iml
+++ b/.idea/modules/plugin-analysis-icu.iml
@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="false">
+    <output url="file://$MODULE_DIR$/../../plugins/analysis/icu/build/classes/main" />
+    <output-test url="file://$MODULE_DIR$/../../plugins/analysis/icu/build/classes/test" />
+    <exclude-output />
+    <content url="file://$MODULE_DIR$/../../plugins/analysis/icu">
+      <sourceFolder url="file://$MODULE_DIR$/../../plugins/analysis/icu/src/main/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/../../plugins/analysis/icu/src/test/java" isTestSource="true" />
+      <excludeFolder url="file://$MODULE_DIR$/../../plugins/analysis/icu/build" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="module" module-name="elasticsearch" />
+    <orderEntry type="module-library">
+      <library name="icu4j">
+        <CLASSES>
+          <root url="jar://$GRADLE_REPOSITORY$/com.ibm.icu/icu4j/jars/icu4j-4.4.jar!/" />
+        </CLASSES>
+        <JAVADOC />
+        <SOURCES>
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/charset/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/localespi/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/translit/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/langdata/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/collate/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/charset/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/collate/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/translit/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/core/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/core/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/regiondata/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/packaging/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/tests/framework/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/currdata/src" />
+          <root url="file://$MODULE_DIR$/../../../../../opt/icu4j/4.4/main/classes/localespi/src" />
+        </SOURCES>
+      </library>
+    </orderEntry>
+    <orderEntry type="module" module-name="test-testng" />
+    <orderEntry type="library" name="testng" level="project" />
+    <orderEntry type="library" name="hamcrest" level="project" />
+    <orderEntry type="module-library">
+      <library name="lucene-collation">
+        <CLASSES>
+          <root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-collation/jars/lucene-collation-3.0.1.jar!/" />
+        </CLASSES>
+        <JAVADOC />
+        <SOURCES />
+      </library>
+    </orderEntry>
+  </component>
+</module>
+
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/env/Environment.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/env/Environment.java
@ -21,9 +21,13 @@ package org.elasticsearch.env;

 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.util.Classes;
+import org.elasticsearch.util.io.Streams;
 import org.elasticsearch.util.settings.Settings;

 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.net.URL;

@ -122,6 +126,10 @@ public class Environment {
        return logsFile;
    }

+    public String resolveConfigAndLoadToString(String path) throws FailedToResolveConfigException, IOException {
+        return Streams.copyToString(new InputStreamReader(resolveConfig(path).openStream(), "UTF-8"));
+    }
+
    public URL resolveConfig(String path) throws FailedToResolveConfigException {
        // first, try it as a path on the file system
        File f1 = new File(path);
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java
@ -19,23 +19,46 @@

 package org.elasticsearch.index.analysis;

+import org.elasticsearch.util.gcommon.collect.Lists;
 import org.elasticsearch.util.guice.inject.AbstractModule;
 import org.elasticsearch.util.guice.inject.Scopes;
 import org.elasticsearch.util.guice.inject.assistedinject.FactoryProvider;
 import org.elasticsearch.util.guice.inject.multibindings.MapBinder;
 import org.elasticsearch.util.settings.Settings;

+import java.util.List;
 import java.util.Map;

 /**
- * @author kimchy (Shay Banon)
+ * @author kimchy (shay.banon)
 */
 public class AnalysisModule extends AbstractModule {

+    public static interface AnalysisBinderProcessor {
+        void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings);
+
+        void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings);
+
+        void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings);
+    }
+
    private final Settings settings;

+    private final List<AnalysisBinderProcessor> processors = Lists.newArrayList();
+
    public AnalysisModule(Settings settings) {
        this.settings = settings;
+        processors.add(new DefaultProcessor());
+        try {
+            processors.add(new ExtendedProcessor());
+        } catch (Throwable t) {
+            // ignore. no extended ones
+        }
+    }
+
+    public AnalysisModule addProcessor(AnalysisBinderProcessor processor) {
+        processors.add(processor);
+        return this;
    }

    @Override protected void configure() {
@ -53,52 +76,9 @@ public class AnalysisModule extends AbstractModule {
            }
            tokenFilterBinder.addBinding(tokenFilterName).toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, type)).in(Scopes.SINGLETON);
        }
-        // add defaults
-        if (!tokenFiltersSettings.containsKey("stop")) {
-            tokenFilterBinder.addBinding("stop").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StopTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("asciifolding")) {
-            tokenFilterBinder.addBinding("asciifolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ASCIIFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("length")) {
-            tokenFilterBinder.addBinding("length").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LengthTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("lowercase")) {
-            tokenFilterBinder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LowerCaseTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("porterStem")) {
-            tokenFilterBinder.addBinding("porterStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("standard")) {
-            tokenFilterBinder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StandardTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("nGram")) {
-            tokenFilterBinder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("edgeNGram")) {
-            tokenFilterBinder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("shingle")) {
-            tokenFilterBinder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        // extends defaults
-        if (!tokenFiltersSettings.containsKey("arabicStem")) {
-            tokenFilterBinder.addBinding("arabicStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("brazilianStem")) {
-            tokenFilterBinder.addBinding("brazilianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("dutchStem")) {
-            tokenFilterBinder.addBinding("dutchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("frenchStem")) {
-            tokenFilterBinder.addBinding("frenchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("germanStem")) {
-            tokenFilterBinder.addBinding("germanStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenFiltersSettings.containsKey("russianStem")) {
-            tokenFilterBinder.addBinding("russianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+
+        for (AnalysisBinderProcessor processor : processors) {
+            processor.processTokenFilters(tokenFilterBinder, tokenFiltersSettings);
        }

        MapBinder<String, TokenizerFactoryFactory> tokenizerBinder
@ -115,29 +95,10 @@ public class AnalysisModule extends AbstractModule {
            }
            tokenizerBinder.addBinding(tokenizerName).toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, type)).in(Scopes.SINGLETON);
        }
-        // add defaults
-        if (!tokenizersSettings.containsKey("standard")) {
-            tokenizerBinder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, StandardTokenizerFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenizersSettings.containsKey("keyword")) {
-            tokenizerBinder.addBinding("keyword").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, KeywordTokenizerFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenizersSettings.containsKey("letter")) {
-            tokenizerBinder.addBinding("letter").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LetterTokenizerFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenizersSettings.containsKey("lowercase")) {
-            tokenizerBinder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LowerCaseTokenizerFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenizersSettings.containsKey("whitespace")) {
-            tokenizerBinder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, WhitespaceTokenizerFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenizersSettings.containsKey("nGram")) {
-            tokenizerBinder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
-        }
-        if (!tokenizersSettings.containsKey("edgeNGram")) {
-            tokenizerBinder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
-        }

+        for (AnalysisBinderProcessor processor : processors) {
+            processor.processTokenizers(tokenizerBinder, tokenizersSettings);
+        }

        MapBinder<String, AnalyzerProviderFactory> analyzerBinder
                = MapBinder.newMapBinder(binder(), String.class, AnalyzerProviderFactory.class);
@ -160,6 +121,184 @@ public class AnalysisModule extends AbstractModule {
            analyzerBinder.addBinding(analyzerName).toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, type)).in(Scopes.SINGLETON);
        }

+        for (AnalysisBinderProcessor processor : processors) {
+            processor.processAnalyzers(analyzerBinder, analyzersSettings);
+        }
+
        bind(AnalysisService.class).in(Scopes.SINGLETON);
    }
+
+    private static class DefaultProcessor implements AnalysisBinderProcessor {
+        @Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
+            // add defaults
+            if (!groupSettings.containsKey("stop")) {
+                binder.addBinding("stop").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StopTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("asciifolding")) {
+                binder.addBinding("asciifolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ASCIIFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("length")) {
+                binder.addBinding("length").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LengthTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("lowercase")) {
+                binder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LowerCaseTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("porterStem")) {
+                binder.addBinding("porterStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("porter_stem")) {
+                binder.addBinding("porter_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("standard")) {
+                binder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StandardTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("nGram")) {
+                binder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("ngram")) {
+                binder.addBinding("ngram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("edgeNGram")) {
+                binder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("edge_ngram")) {
+                binder.addBinding("edge_ngram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("shingle")) {
+                binder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+        }
+
+        @Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
+            // add defaults
+            if (!groupSettings.containsKey("standard")) {
+                binder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, StandardTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("keyword")) {
+                binder.addBinding("keyword").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, KeywordTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("letter")) {
+                binder.addBinding("letter").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LetterTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("lowercase")) {
+                binder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LowerCaseTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("whitespace")) {
+                binder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, WhitespaceTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+        }
+
+        @Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
+            if (!groupSettings.containsKey("standard")) {
+                binder.addBinding("standard").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, StandardAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("simple")) {
+                binder.addBinding("simple").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, SimpleAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("stop")) {
+                binder.addBinding("stop").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, StopAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("whitespace")) {
+                binder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, WhitespaceAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("keyword")) {
+                binder.addBinding("keyword").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, KeywordAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+        }
+    }
+
+    private static class ExtendedProcessor implements AnalysisBinderProcessor {
+        @Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
+            if (!groupSettings.containsKey("arabicStem")) {
+                binder.addBinding("arabicStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("arabic_stem")) {
+                binder.addBinding("arabic_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("brazilianStem")) {
+                binder.addBinding("brazilianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("brazilian_stem")) {
+                binder.addBinding("brazilian_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("dutchStem")) {
+                binder.addBinding("dutchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("dutch_stem")) {
+                binder.addBinding("dutch_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("frenchStem")) {
+                binder.addBinding("frenchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("french_stem")) {
+                binder.addBinding("french_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("germanStem")) {
+                binder.addBinding("germanStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("german_stem")) {
+                binder.addBinding("german_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("russianStem")) {
+                binder.addBinding("russianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("russian_stem")) {
+                binder.addBinding("russian_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
+            }
+        }
+
+        @Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
+            if (!groupSettings.containsKey("nGram")) {
+                binder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("ngram")) {
+                binder.addBinding("ngram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("edgeNGram")) {
+                binder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("edge_ngram")) {
+                binder.addBinding("edge_ngram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
+            }
+        }
+
+        @Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
+            if (!groupSettings.containsKey("arabic")) {
+                binder.addBinding("arabic").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ArabicAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("brazilian")) {
+                binder.addBinding("brazilian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, BrazilianAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("chinese")) {
+                binder.addBinding("chinese").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ChineseAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("cjk")) {
+                binder.addBinding("cjk").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ChineseAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("czech")) {
+                binder.addBinding("czech").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, CzechAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("dutch")) {
+                binder.addBinding("dutch").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, DutchAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("french")) {
+                binder.addBinding("french").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, FrenchAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("german")) {
+                binder.addBinding("german").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, GermanAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("greek")) {
+                binder.addBinding("greek").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, GreekAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("persian")) {
+                binder.addBinding("persian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, PersianAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("russian")) {
+                binder.addBinding("russian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, RussianAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+            if (!groupSettings.containsKey("thai")) {
+                binder.addBinding("thai").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ThaiAnalyzerProvider.class)).in(Scopes.SINGLETON);
+            }
+        }
+    }
 }
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java
@ -74,22 +74,6 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
            }
        }

-        // add some defaults
-        if (!analyzerProviders.containsKey("standard")) {
-            analyzerProviders.put("standard", new StandardAnalyzerProvider(index, indexSettings, "standard", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("simple")) {
-            analyzerProviders.put("simple", new SimpleAnalyzerProvider(index, indexSettings, "simple", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("stop")) {
-            analyzerProviders.put("stop", new StopAnalyzerProvider(index, indexSettings, "stop", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("whitespace")) {
-            analyzerProviders.put("whitespace", new WhitespaceAnalyzerProvider(index, indexSettings, "whitespace", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("keyword")) {
-            analyzerProviders.put("keyword", new KeywordAnalyzerProvider(index, indexSettings, "keyword", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
        if (!analyzerProviders.containsKey("default")) {
            analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, "default", ImmutableSettings.Builder.EMPTY_SETTINGS));
        }
@ -100,45 +84,6 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
            analyzerProviders.put("default_search", analyzerProviders.get("default"));
        }

-        // extended analyzers defaults
-        if (!analyzerProviders.containsKey("arabic")) {
-            analyzerProviders.put("arabic", new ArabicAnalyzerProvider(index, indexSettings, "arabic", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("brazilian")) {
-            analyzerProviders.put("brazilian", new BrazilianAnalyzerProvider(index, indexSettings, "brazilian", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("chinese")) {
-            analyzerProviders.put("chinese", new ChineseAnalyzerProvider(index, indexSettings, "chinese", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("cjk")) {
-            analyzerProviders.put("cjk", new ChineseAnalyzerProvider(index, indexSettings, "cjk", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("czech")) {
-            analyzerProviders.put("czech", new CzechAnalyzerProvider(index, indexSettings, "czech", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("dutch")) {
-            analyzerProviders.put("dutch", new DutchAnalyzerProvider(index, indexSettings, "dutch", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("french")) {
-            analyzerProviders.put("french", new FrenchAnalyzerProvider(index, indexSettings, "french", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("german")) {
-            analyzerProviders.put("german", new GermanAnalyzerProvider(index, indexSettings, "german", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("greek")) {
-            analyzerProviders.put("greek", new GreekAnalyzerProvider(index, indexSettings, "greek", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("persian")) {
-            analyzerProviders.put("persian", new PersianAnalyzerProvider(index, indexSettings, "persian", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("russian")) {
-            analyzerProviders.put("russian", new RussianAnalyzerProvider(index, indexSettings, "russian", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("thai")) {
-            analyzerProviders.put("thai", new ThaiAnalyzerProvider(index, indexSettings, "thai", ImmutableSettings.Builder.EMPTY_SETTINGS));
-        }
-
-
        this.analyzerProviders = ImmutableMap.copyOf(analyzerProviders);

        Map<String, NamedAnalyzer> analyzers = newHashMap();
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/service/InternalIndexService.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/service/InternalIndexService.java
@ -20,6 +20,7 @@
 package org.elasticsearch.index.service;

 import org.elasticsearch.util.gcommon.collect.ImmutableMap;
+import org.elasticsearch.util.gcommon.collect.Lists;
 import org.elasticsearch.util.gcommon.collect.UnmodifiableIterator;
 import org.elasticsearch.util.guice.inject.Inject;
 import org.elasticsearch.util.guice.inject.Injector;
@ -56,9 +57,11 @@ import org.elasticsearch.plugins.PluginsService;
 import org.elasticsearch.plugins.ShardsPluginsModule;
 import org.elasticsearch.util.component.CloseableIndexComponent;
 import org.elasticsearch.util.guice.Injectors;
+import org.elasticsearch.util.guice.inject.Module;
 import org.elasticsearch.util.settings.Settings;

 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;

@ -189,16 +192,20 @@ public class InternalIndexService extends AbstractIndexComponent implements Inde

        logger.debug("Creating shard_id[{}]", shardId.id());

-        Injector shardInjector = injector.createChildInjector(
-                new ShardsPluginsModule(indexSettings, pluginsService),
-                new IndexShardModule(shardId),
-                new StoreModule(indexSettings),
-                new DeletionPolicyModule(indexSettings),
-                new MergePolicyModule(indexSettings),
-                new MergeSchedulerModule(indexSettings),
-                new TranslogModule(indexSettings),
-                new EngineModule(indexSettings),
-                new IndexShardGatewayModule(injector.getInstance(IndexGateway.class)));
+        List<Module> modules = Lists.newArrayList();
+        modules.add(new ShardsPluginsModule(indexSettings, pluginsService));
+        modules.add(new IndexShardModule(shardId));
+        modules.add(new StoreModule(indexSettings));
+        modules.add(new DeletionPolicyModule(indexSettings));
+        modules.add(new MergePolicyModule(indexSettings));
+        modules.add(new MergeSchedulerModule(indexSettings));
+        modules.add(new TranslogModule(indexSettings));
+        modules.add(new EngineModule(indexSettings));
+        modules.add(new IndexShardGatewayModule(injector.getInstance(IndexGateway.class)));
+
+        pluginsService.processModules(modules);
+
+        Injector shardInjector = injector.createChildInjector(modules);

        shardsInjectors = newMapBuilder(shardsInjectors).put(shardId.id(), shardInjector).immutableMap();

--- a/modules/elasticsearch/src/main/java/org/elasticsearch/indices/InternalIndicesService.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/indices/InternalIndicesService.java
@ -47,8 +47,10 @@ import org.elasticsearch.util.component.AbstractLifecycleComponent;
 import org.elasticsearch.util.component.CloseableIndexComponent;
 import org.elasticsearch.util.concurrent.ThreadSafe;
 import org.elasticsearch.util.guice.Injectors;
+import org.elasticsearch.util.guice.inject.Module;
 import org.elasticsearch.util.settings.Settings;

+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
@ -167,19 +169,23 @@ public class InternalIndicesService extends AbstractLifecycleComponent<IndicesSe
                .globalSettings(settings.getGlobalSettings())
                .build();

-        Injector indexInjector = injector.createChildInjector(
-                new IndexNameModule(index),
-                new LocalNodeIdModule(localNodeId),
-                new IndexSettingsModule(indexSettings),
-                new IndicesPluginsModule(indexSettings, pluginsService),
-                new AnalysisModule(indexSettings),
-                new SimilarityModule(indexSettings),
-                new IndexCacheModule(indexSettings),
-                new IndexQueryParserModule(indexSettings),
-                new MapperServiceModule(),
-                new IndexGatewayModule(indexSettings, injector.getInstance(Gateway.class)),
-                new OperationRoutingModule(indexSettings),
-                new IndexModule());
+        ArrayList<Module> modules = new ArrayList<Module>();
+        modules.add(new IndexNameModule(index));
+        modules.add(new LocalNodeIdModule(localNodeId));
+        modules.add(new IndexSettingsModule(indexSettings));
+        modules.add(new IndicesPluginsModule(indexSettings, pluginsService));
+        modules.add(new AnalysisModule(indexSettings));
+        modules.add(new SimilarityModule(indexSettings));
+        modules.add(new IndexCacheModule(indexSettings));
+        modules.add(new IndexQueryParserModule(indexSettings));
+        modules.add(new MapperServiceModule());
+        modules.add(new IndexGatewayModule(indexSettings, injector.getInstance(Gateway.class)));
+        modules.add(new OperationRoutingModule(indexSettings));
+        modules.add(new IndexModule());
+
+        pluginsService.processModules(modules);
+
+        Injector indexInjector = injector.createChildInjector(modules);

        indicesInjectors.put(index.name(), indexInjector);

--- a/modules/elasticsearch/src/main/java/org/elasticsearch/node/internal/InternalNode.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/node/internal/InternalNode.java
@ -131,6 +131,7 @@ public final class InternalNode implements Node {
        modules.add(new GatewayModule(settings));
        modules.add(new NodeClientModule());

+        pluginsService.processModules(modules);

        injector = Guice.createInjector(modules);

--- a/modules/elasticsearch/src/main/java/org/elasticsearch/plugins/AbstractPlugin.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/plugins/AbstractPlugin.java
@ -74,4 +74,8 @@ public abstract class AbstractPlugin implements Plugin {
    @Override public Collection<Class<? extends CloseableIndexComponent>> shardServices() {
        return ImmutableList.of();
    }
+
+    @Override public void processModule(Module module) {
+        // nothing to do here
+    }
 }
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/plugins/Plugin.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/plugins/Plugin.java
@ -71,4 +71,6 @@ public interface Plugin {
     * Per index shard service that will be automatically closed.
     */
    Collection<Class<? extends CloseableIndexComponent>> shardServices();
+
+    void processModule(Module module);
 }
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/plugins/PluginsService.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/plugins/PluginsService.java
@ -70,6 +70,14 @@ public class PluginsService extends AbstractComponent {
        return this.settings;
    }

+    public void processModules(Iterable<Module> modules) {
+        for (Module module : modules) {
+            for (Plugin plugin : plugins.values()) {
+                plugin.processModule(module);
+            }
+        }
+    }
+
    public Collection<Class<? extends Module>> modules() {
        List<Class<? extends Module>> modules = Lists.newArrayList();
        for (Plugin plugin : plugins.values()) {
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/analysis/CharSequenceTermAttribute.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/analysis/CharSequenceTermAttribute.java
@ -0,0 +1,51 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.util.lucene.analysis;
+
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+// TODO Lucene Monitor: Once 3.1 is out, no need for this class anymore, use CharTermAttribute
+public class CharSequenceTermAttribute implements CharSequence {
+
+    private final TermAttribute termAtt;
+
+    public CharSequenceTermAttribute(TermAttribute termAtt) {
+        this.termAtt = termAtt;
+    }
+
+    @Override public int length() {
+        return termAtt.termLength();
+    }
+
+    @Override public char charAt(int index) {
+        if (index >= length())
+            throw new IndexOutOfBoundsException();
+        return termAtt.termBuffer()[index];
+    }
+
+    @Override public CharSequence subSequence(int start, int end) {
+        if (start > length() || end > length())
+            throw new IndexOutOfBoundsException();
+        return new String(termAtt.termBuffer(), start, end - start);
+    }
+}
--- a/plugins/analysis/icu/build.gradle
+++ b/plugins/analysis/icu/build.gradle
@ -0,0 +1,142 @@
+dependsOn(':elasticsearch')
+
+apply plugin: 'java'
+apply plugin: 'maven'
+
+archivesBaseName = "elasticsearch-analysis-icu"
+
+explodedDistDir = new File(distsDir, 'exploded')
+
+manifest.mainAttributes("Implementation-Title": "ElasticSearch::Plugins::Analysis::ICU", "Implementation-Version": rootProject.version, "Implementation-Date": buildTimeStr)
+
+configurations.compile.transitive = true
+configurations.testCompile.transitive = true
+
+// no need to use the resource dir
+sourceSets.main.resources.srcDirs 'src/main/java'
+sourceSets.test.resources.srcDirs 'src/test/java'
+
+// add the source files to the dist jar
+//jar {
+//    from sourceSets.main.allJava
+//}
+
+configurations {
+    dists
+    distLib {
+        visible = false
+        transitive = false
+    }
+}
+
+dependencies {
+    compile project(':elasticsearch')
+
+    compile('com.ibm.icu:icu4j:4.4') { transitive = false }
+    distLib('com.ibm.icu:icu4j:4.4') { transitive = false }
+    compile('org.apache.lucene:lucene-collation:3.0.1') { transitive = false }
+    distLib('org.apache.lucene:lucene-collation:3.0.1') { transitive = false }
+
+
+    testCompile project(':test-testng')
+    testCompile('org.testng:testng:5.10:jdk15') { transitive = false }
+    testCompile 'org.hamcrest:hamcrest-all:1.1'
+}
+
+test {
+    useTestNG()
+    jmvArgs = ["-ea", "-Xmx1024m"]
+    suiteName = project.name
+    listeners = ["org.elasticsearch.util.testng.Listeners"]
+    systemProperties["es.test.log.conf"] = System.getProperty("es.test.log.conf", "log4j-gradle.properties")
+}
+
+task explodedDist(dependsOn: [jar], description: 'Builds the plugin zip file') << {
+    [explodedDistDir]*.mkdirs()
+
+    copy {
+        from configurations.distLib
+        into explodedDistDir
+    }
+
+    // remove elasticsearch files (compile above adds the elasticsearch one)
+    ant.delete { fileset(dir: explodedDistDir, includes: "elasticsearch-*.jar") }
+
+    copy {
+        from libsDir
+        into explodedDistDir
+    }
+
+    ant.delete { fileset(dir: explodedDistDir, includes: "elasticsearch-*-javadoc.jar") }
+    ant.delete { fileset(dir: explodedDistDir, includes: "elasticsearch-*-sources.jar") }
+}
+
+task zip(type: Zip, dependsOn: ['explodedDist']) {
+    from(explodedDistDir) {
+    }
+}
+
+task release(dependsOn: [zip]) << {
+    ant.delete(dir: explodedDistDir)
+    copy {
+        from distsDir
+        into(new File(rootProject.distsDir, "plugins"))
+    }
+}
+
+configurations {
+    deployerJars
+}
+
+dependencies {
+    deployerJars "org.apache.maven.wagon:wagon-http:1.0-beta-2"
+}
+
+task sourcesJar(type: Jar, dependsOn: classes) {
+    classifier = 'sources'
+    from sourceSets.main.allSource
+}
+
+task javadocJar(type: Jar, dependsOn: javadoc) {
+    classifier = 'javadoc'
+    from javadoc.destinationDir
+}
+
+artifacts {
+    archives sourcesJar
+    archives javadocJar
+}
+
+uploadArchives {
+    repositories.mavenDeployer {
+        configuration = configurations.deployerJars
+        repository(url: rootProject.mavenRepoUrl) {
+            authentication(userName: rootProject.mavenRepoUser, password: rootProject.mavenRepoPass)
+        }
+        snapshotRepository(url: rootProject.mavenSnapshotRepoUrl) {
+            authentication(userName: rootProject.mavenRepoUser, password: rootProject.mavenRepoPass)
+        }
+
+        pom.project {
+            inceptionYear '2009'
+            name 'elasticsearch-plugins-analysis-icu'
+            description 'Attachments Plugin for ElasticSearch'
+            licenses {
+                license {
+                    name 'The Apache Software License, Version 2.0'
+                    url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
+                    distribution 'repo'
+                }
+            }
+            scm {
+                connection 'git://github.com/elasticsearch/elasticsearch.git'
+                developerConnection 'git@github.com:elasticsearch/elasticsearch.git'
+                url 'http://github.com/elasticsearch/elasticsearch'
+            }
+        }
+
+        pom.whenConfigured {pom ->
+            pom.dependencies = pom.dependencies.findAll {dep -> dep.scope != 'test' } // removes the test scoped ones
+        }
+    }
+}
--- a/plugins/analysis/icu/src/main/java/es-plugin.properties
+++ b/plugins/analysis/icu/src/main/java/es-plugin.properties
@ -0,0 +1 @@
+plugin=org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/ICUFoldingFilter.java
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/ICUFoldingFilter.java
@ -0,0 +1,74 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.ibm.icu.text.Normalizer2;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A TokenFilter that applies search term folding to Unicode text,
+ * applying foldings from UTR#30 Character Foldings.
+ * <p>
+ * This filter applies the following foldings from the report to unicode text:
+ * <ul>
+ * <li>Accent removal
+ * <li>Case folding
+ * <li>Canonical duplicates folding
+ * <li>Dashes folding
+ * <li>Diacritic removal (including stroke, hook, descender)
+ * <li>Greek letterforms folding
+ * <li>Han Radical folding
+ * <li>Hebrew Alternates folding
+ * <li>Jamo folding
+ * <li>Letterforms folding
+ * <li>Math symbol folding
+ * <li>Multigraph Expansions: All
+ * <li>Native digit folding
+ * <li>No-break folding
+ * <li>Overline folding
+ * <li>Positional forms folding
+ * <li>Small forms folding
+ * <li>Space folding
+ * <li>Spacing Accents folding
+ * <li>Subscript folding
+ * <li>Superscript folding
+ * <li>Suzhou Numeral folding
+ * <li>Symbol folding
+ * <li>Underline folding
+ * <li>Vertical forms folding
+ * <li>Width folding
+ * </ul>
+ * <p>
+ * Additionally, Default Ignorables are removed, and text is normalized to NFKC.
+ * All foldings, case folding, and normalization mappings are applied recursively
+ * to ensure a fully folded and normalized result.
+ * </p>
+ */
+public final class ICUFoldingFilter extends ICUNormalizer2Filter {
+    private static final Normalizer2 normalizer = Normalizer2.getInstance(ICUFoldingFilter.class.getResourceAsStream("utr30.nrm"),
+            "utr30", Normalizer2.Mode.COMPOSE);
+
+    /**
+     * Create a new ICUFoldingFilter on the specified input
+     */
+    public ICUFoldingFilter(TokenStream input) {
+        super(input, normalizer);
+    }
+}
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/ICUNormalizer2Filter.java
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/ICUNormalizer2Filter.java
@ -0,0 +1,101 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.Normalizer2;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.elasticsearch.util.lucene.analysis.CharSequenceTermAttribute;
+
+import java.io.IOException;
+
+/**
+ * Normalize token text with ICU's {@link com.ibm.icu.text.Normalizer2}
+ * <p>
+ * With this filter, you can normalize text in the following ways:
+ * <ul>
+ * <li> NFKC Normalization, Case Folding, and removing Ignorables (the default)
+ * <li> Using a standard Normalization mode (NFC, NFD, NFKC, NFKD)
+ * <li> Based on rules from a custom normalization mapping.
+ * </ul>
+ * <p>
+ * If you use the defaults, this filter is a simple way to standardize Unicode text
+ * in a language-independent way for search:
+ * <ul>
+ * <li> The case folding that it does can be seen as a replacement for
+ * LowerCaseFilter: For example, it handles cases such as the Greek sigma, so that
+ * "Μάϊος" and "ΜΆΪΟΣ" will match correctly.
+ * <li> The normalization will standardizes different forms of the same
+ * character in Unicode. For example, CJK full-width numbers will be standardized
+ * to their ASCII forms.
+ * <li> Ignorables such as Zero-Width Joiner and Variation Selectors are removed.
+ * These are typically modifier characters that affect display.
+ * </ul>
+ *
+ * @see com.ibm.icu.text.Normalizer2
+ * @see com.ibm.icu.text.FilteredNormalizer2
+ */
+// TODO Lucene Monitor: Once 3.1 is released use it instead
+public class ICUNormalizer2Filter extends TokenFilter {
+
+    private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+
+    private final Normalizer2 normalizer;
+
+    private final StringBuilder buffer = new StringBuilder();
+
+    private final CharSequenceTermAttribute charSequenceTermAtt;
+
+    /**
+     * Create a new Normalizer2Filter that combines NFKC normalization, Case
+     * Folding, and removes Default Ignorables (NFKC_Casefold)
+     */
+    public ICUNormalizer2Filter(TokenStream input) {
+        this(input, Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
+    }
+
+    /**
+     * Create a new Normalizer2Filter with the specified Normalizer2
+     *
+     * @param input      stream
+     * @param normalizer normalizer to use
+     */
+    public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) {
+        super(input);
+        this.normalizer = normalizer;
+        this.charSequenceTermAtt = new CharSequenceTermAttribute(termAtt);
+    }
+
+    @Override
+    public final boolean incrementToken() throws IOException {
+        if (input.incrementToken()) {
+            if (normalizer.quickCheck(charSequenceTermAtt) != Normalizer.YES) {
+                buffer.setLength(0);
+                normalizer.normalize(charSequenceTermAtt, buffer);
+                termAtt.setTermBuffer(buffer.toString());
+            }
+            return true;
+        } else {
+            return false;
+        }
+    }
+}
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuAnalysisBinderProcessor.java
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuAnalysisBinderProcessor.java
@ -0,0 +1,62 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.elasticsearch.util.guice.inject.Scopes;
+import org.elasticsearch.util.guice.inject.assistedinject.FactoryProvider;
+import org.elasticsearch.util.guice.inject.multibindings.MapBinder;
+import org.elasticsearch.util.settings.Settings;
+
+import java.util.Map;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class IcuAnalysisBinderProcessor implements AnalysisModule.AnalysisBinderProcessor {
+
+    @Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
+        if (!groupSettings.containsKey("icuNormalizer")) {
+            binder.addBinding("icuNormalizer").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuNormalizerTokenFilterFactory.class)).in(Scopes.SINGLETON);
+        }
+        if (!groupSettings.containsKey("icu_normalizer")) {
+            binder.addBinding("icu_normalizer").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuNormalizerTokenFilterFactory.class)).in(Scopes.SINGLETON);
+        }
+
+        if (!groupSettings.containsKey("icuFolding")) {
+            binder.addBinding("icuFolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
+        }
+        if (!groupSettings.containsKey("icu_folding")) {
+            binder.addBinding("icu_folding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
+        }
+
+        if (!groupSettings.containsKey("icuCollation")) {
+            binder.addBinding("icuCollation").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuCollationTokenFilterFactory.class)).in(Scopes.SINGLETON);
+        }
+        if (!groupSettings.containsKey("icu_collation")) {
+            binder.addBinding("icu_collation").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuCollationTokenFilterFactory.class)).in(Scopes.SINGLETON);
+        }
+    }
+
+    @Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
+    }
+
+    @Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
+    }
+}
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuCollationTokenFilterFactory.java
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuCollationTokenFilterFactory.java
@ -0,0 +1,104 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.RuleBasedCollator;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.collation.ICUCollationKeyFilter;
+import org.elasticsearch.ElasticSearchIllegalArgumentException;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.env.FailedToResolveConfigException;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.settings.IndexSettings;
+import org.elasticsearch.util.guice.inject.Inject;
+import org.elasticsearch.util.guice.inject.assistedinject.Assisted;
+import org.elasticsearch.util.settings.Settings;
+
+import java.io.IOException;
+import java.util.Locale;
+
+/**
+ * An ICU based collation token filter. There are two ways to configure collation:
+ *
+ * <p>The first is simply specifying the locale (defaults to the default locale). The <tt>language</tt>
+ * parameter is the lowercase two-letter ISO-639 code. An additional <tt>country</tt> and <tt>variant</tt>
+ * can be provided.
+ *
+ * <p>The second option is to specify collation rules as defined in the <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
+ * Collation customization</a> chapter in icu docs. The <tt>rules</tt> parameter can either embed the rules definition
+ * in the settings or refer to an external location (preferable located under the <tt>config</tt> location, relative to it).
+ *
+ * @author kimchy (shay.banon)
+ */
+public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory {
+
+    private final Collator collator;
+
+    @Inject public IcuCollationTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) {
+        super(index, indexSettings, name);
+
+        Collator collator;
+        String rules = settings.get("rules");
+        if (rules != null) {
+            FailedToResolveConfigException failureToResolve = null;
+            try {
+                rules = environment.resolveConfigAndLoadToString(rules);
+            } catch (FailedToResolveConfigException e) {
+                failureToResolve = e;
+            } catch (IOException e) {
+                throw new ElasticSearchIllegalArgumentException("Failed to load collation rules", e);
+            }
+            try {
+                collator = new RuleBasedCollator(rules);
+            } catch (Exception e) {
+                if (failureToResolve != null) {
+                    throw new ElasticSearchIllegalArgumentException("Failed to resolve collation rules location", failureToResolve);
+                } else {
+                    throw new ElasticSearchIllegalArgumentException("Failed to parse collation rules", e);
+                }
+            }
+        } else {
+            String language = settings.get("language");
+            if (language != null) {
+                Locale locale;
+                String country = settings.get("country");
+                if (country != null) {
+                    String variant = settings.get("variant");
+                    if (variant != null) {
+                        locale = new Locale(language, country, variant);
+                    } else {
+                        locale = new Locale(language, country);
+                    }
+                } else {
+                    locale = new Locale(language);
+                }
+                collator = Collator.getInstance(locale);
+            } else {
+                collator = Collator.getInstance();
+            }
+        }
+        this.collator = collator;
+    }
+
+    @Override public TokenStream create(TokenStream tokenStream) {
+        return new ICUCollationKeyFilter(tokenStream, collator);
+    }
+}
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuFoldingTokenFilterFactory.java
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuFoldingTokenFilterFactory.java
@ -0,0 +1,42 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.settings.IndexSettings;
+import org.elasticsearch.util.guice.inject.Inject;
+import org.elasticsearch.util.guice.inject.assistedinject.Assisted;
+import org.elasticsearch.util.settings.Settings;
+
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
+
+    @Inject public IcuFoldingTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
+        super(index, indexSettings, name);
+    }
+
+    @Override public TokenStream create(TokenStream tokenStream) {
+        return new ICUFoldingFilter(tokenStream);
+    }
+}
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java
@ -0,0 +1,51 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.ibm.icu.text.Normalizer2;
+import org.apache.lucene.analysis.TokenStream;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.settings.IndexSettings;
+import org.elasticsearch.util.guice.inject.Inject;
+import org.elasticsearch.util.guice.inject.assistedinject.Assisted;
+import org.elasticsearch.util.settings.Settings;
+
+
+/**
+ * Uses the {@link org.elasticsearch.index.analysis.ICUNormalizer2Filter} to normalize tokens.
+ *
+ * <p>The <tt>name</tt> can be used to provide the type of normalization to perofrm. 
+ *
+ * @author kimchy (shay.banon)
+ * @see org.elasticsearch.index.analysis.ICUNormalizer2Filter
+ */
+public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory {
+
+    private final String name;
+
+    @Inject public IcuNormalizerTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
+        super(index, indexSettings, name);
+        this.name = settings.get("name", "nfkc_cf");
+    }
+
+    @Override public TokenStream create(TokenStream tokenStream) {
+        return new ICUNormalizer2Filter(tokenStream, Normalizer2.getInstance(null, name, Normalizer2.Mode.COMPOSE));
+    }
+}
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/utr30.nrm
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/index/analysis/utr30.nrm
--- a/plugins/analysis/icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java
+++ b/plugins/analysis/icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java
@ -0,0 +1,46 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.plugin.analysis.icu;
+
+import org.elasticsearch.index.analysis.AnalysisModule;
+import org.elasticsearch.index.analysis.IcuAnalysisBinderProcessor;
+import org.elasticsearch.plugins.AbstractPlugin;
+import org.elasticsearch.util.guice.inject.Module;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class AnalysisICUPlugin extends AbstractPlugin {
+
+    @Override public String name() {
+        return "analysis-icu";
+    }
+
+    @Override public String description() {
+        return "UTF related ICU analysis support";
+    }
+
+    @Override public void processModule(Module module) {
+        if (module instanceof AnalysisModule) {
+            AnalysisModule analysisModule = (AnalysisModule) module;
+            analysisModule.addProcessor(new IcuAnalysisBinderProcessor());
+        }
+    }
+}
--- a/plugins/analysis/icu/src/test/java/org/elasticsearch/index/analysis/Normalizer2Tests.java
+++ b/plugins/analysis/icu/src/test/java/org/elasticsearch/index/analysis/Normalizer2Tests.java
@ -0,0 +1,42 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.ibm.icu.text.Normalizer2;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.testng.annotations.Test;
+
+import java.text.Normalizer;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class Normalizer2Tests {
+
+    @Test public void testNormalizer2() {
+        Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
+        MatcherAssert.assertThat(normalizer.normalize("Jordania"), Matchers.equalTo("jordania"));
+        MatcherAssert.assertThat(normalizer.normalize("João"), Matchers.equalTo("joão"));
+
+        MatcherAssert.assertThat(Normalizer.normalize("Jordania", Normalizer.Form.NFKC), Matchers.equalTo("Jordania"));
+        MatcherAssert.assertThat(Normalizer.normalize("João", Normalizer.Form.NFKC), Matchers.equalTo("João"));
+    }
+}
--- a/plugins/analysis/icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java
+++ b/plugins/analysis/icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java
@ -0,0 +1,50 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.IndexNameModule;
+import org.elasticsearch.index.settings.IndexSettingsModule;
+import org.elasticsearch.util.guice.inject.Guice;
+import org.elasticsearch.util.guice.inject.Injector;
+import org.hamcrest.MatcherAssert;
+import org.testng.annotations.Test;
+
+import static org.elasticsearch.util.settings.ImmutableSettings.Builder.*;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class SimpleIcuAnalysisTests {
+
+    @Test public void testDefaultsIcuAnalysis() {
+        Index index = new Index("test");
+        Injector injector = Guice.createInjector(
+                new IndexSettingsModule(EMPTY_SETTINGS),
+                new IndexNameModule(index),
+                new AnalysisModule(EMPTY_SETTINGS).addProcessor(new IcuAnalysisBinderProcessor()));
+
+        AnalysisService analysisService = injector.getInstance(AnalysisService.class);
+
+        TokenFilterFactory filterFactory = analysisService.tokenFilter("icu_normalizer");
+        MatcherAssert.assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));
+    }
+}
--- a/plugins/mapper/attachments/build.gradle
+++ b/plugins/mapper/attachments/build.gradle
@ -117,7 +117,7 @@ uploadArchives {

        pom.project {
            inceptionYear '2009'
-            name 'elasticsearch-plugins-attachments'
+            name 'elasticsearch-plugins-mapper-attachments'
            description 'Attachments Plugin for ElasticSearch'
            licenses {
                license {
--- a/plugins/mapper/attachments/src/main/java/es-plugin.properties
+++ b/plugins/mapper/attachments/src/main/java/es-plugin.properties
@ -1 +1 @@
-plugin=org.elasticsearch.plugin.attachments.AttachmentsPlugin
+plugin=org.elasticsearch.plugin.attachments.MapperAttachmentsPlugin
--- a/plugins/mapper/attachments/src/main/java/org/elasticsearch/plugin/attachments/MapperAttachmentsIndexModule.java
+++ b/plugins/mapper/attachments/src/main/java/org/elasticsearch/plugin/attachments/MapperAttachmentsIndexModule.java
@ -25,7 +25,7 @@ import org.elasticsearch.plugin.attachments.index.mapper.JsonAttachmentMapperSer
 /**
 * @author kimchy (shay.banon)
 */
-public class AttachmentsIndexModule extends AbstractModule {
+public class MapperAttachmentsIndexModule extends AbstractModule {

    @Override protected void configure() {
        bind(JsonAttachmentMapperService.class).asEagerSingleton();
--- a/plugins/mapper/attachments/src/main/java/org/elasticsearch/plugin/attachments/MapperAttachmentsPlugin.java
+++ b/plugins/mapper/attachments/src/main/java/org/elasticsearch/plugin/attachments/MapperAttachmentsPlugin.java
@ -29,7 +29,7 @@ import static org.elasticsearch.util.gcommon.collect.Lists.*;
 /**
 * @author kimchy (shay.banon)
 */
-public class AttachmentsPlugin extends AbstractPlugin {
+public class MapperAttachmentsPlugin extends AbstractPlugin {

    @Override public String name() {
        return "mapper-attachments";
@ -41,7 +41,7 @@ public class AttachmentsPlugin extends AbstractPlugin {

    @Override public Collection<Class<? extends Module>> indexModules() {
        Collection<Class<? extends Module>> modules = newArrayList();
-        modules.add(AttachmentsIndexModule.class);
+        modules.add(MapperAttachmentsIndexModule.class);
        return modules;
    }
 }
--- a/settings.gradle
+++ b/settings.gradle
@ -6,6 +6,7 @@ include 'test-integration'

 include 'benchmark-micro'

+include 'plugins-analysis-icu'
 include 'plugins-mapper-attachments'
 include 'plugins-client-groovy'
 include 'plugins-transport-memcached'
				`@ -0,0 +1 @@`
				`plugin=org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin`