diff --git a/README.md b/README.md index 50bb848c982..ce43c102d6f 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ The ICU Analysis plugin integrates Lucene ICU module into elasticsearch, adding In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-icu/1.0.0`. - --------------------------------------- - | memcached Plugin | ElasticSearch | - --------------------------------------- - | master | 0.18 -> master | - --------------------------------------- - | 1.0.0 | 0.18 -> master | - --------------------------------------- + ---------------------------------------- + | ICU Analysis Plugin | ElasticSearch | + ---------------------------------------- + | master | 0.18 -> master | + ---------------------------------------- + | 1.0.0 | 0.18 -> master | + ---------------------------------------- diff --git a/src/main/java/org/elasticsearch/index/analysis/IcuAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/IcuAnalysisBinderProcessor.java index 111607cbea6..dd4413a580f 100644 --- a/src/main/java/org/elasticsearch/index/analysis/IcuAnalysisBinderProcessor.java +++ b/src/main/java/org/elasticsearch/index/analysis/IcuAnalysisBinderProcessor.java @@ -1,8 +1,8 @@ /* - * Licensed to ElasticSearch and Shay Banon under one + * Licensed to Elastic Search and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this + * regarding copyright ownership. Elastic Search licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at @@ -20,12 +20,16 @@ package org.elasticsearch.index.analysis; /** - * + * @author kimchy (shay.banon) */ public class IcuAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { - @Override - public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { + @Override public void processTokenizers(TokenizersBindings tokenizersBindings) { + tokenizersBindings.processTokenizer("icuTokenizer", IcuTokenizerFactory.class); + tokenizersBindings.processTokenizer("icu_tokenizer", IcuTokenizerFactory.class); + } + + @Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { tokenFiltersBindings.processTokenFilter("icuNormalizer", IcuNormalizerTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("icu_normalizer", IcuNormalizerTokenFilterFactory.class); @@ -34,5 +38,8 @@ public class IcuAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderPro tokenFiltersBindings.processTokenFilter("icuCollation", IcuCollationTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("icu_collation", IcuCollationTokenFilterFactory.class); + + tokenFiltersBindings.processTokenFilter("icuTransform", IcuTransformTokenFilterFactory.class); + tokenFiltersBindings.processTokenFilter("icu_transform", IcuTransformTokenFilterFactory.class); } } diff --git a/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java new file mode 100644 index 00000000000..736ba204046 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import java.io.Reader; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * + * @author joerg + */ +public class IcuTokenizerFactory extends AbstractTokenizerFactory { + + @Inject public IcuTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + } + + @Override + public Tokenizer create(Reader reader) { + return new ICUTokenizer(reader); + } + +} diff --git a/src/main/java/org/elasticsearch/index/analysis/IcuTransformTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/IcuTransformTokenFilterFactory.java new file mode 100644 index 00000000000..1f34d02b3bd --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/IcuTransformTokenFilterFactory.java @@ -0,0 +1,52 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.icu.ICUTransformFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import com.ibm.icu.text.Transliterator; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + + +/** + * @author joergprante + */ +public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory { + + private final String id; + private final int dir; + private final Transliterator transliterator; + + @Inject public IcuTransformTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + this.id = settings.get("id", "Null"); + String s = settings.get("dir", "forward"); + this.dir = "forward".equals(s) ? Transliterator.FORWARD : Transliterator.REVERSE; + this.transliterator = Transliterator.getInstance(id, dir); + } + + @Override public TokenStream create(TokenStream tokenStream) { + return new ICUTransformFilter(tokenStream, transliterator); + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java index adcd03a645b..f28f13184dd 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java @@ -1,8 +1,8 @@ /* - * Licensed to ElasticSearch and Shay Banon under one + * Licensed to Elastic Search and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this + * regarding copyright ownership. Elastic Search licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at @@ -32,16 +32,15 @@ import org.elasticsearch.indices.analysis.IndicesAnalysisService; import org.hamcrest.MatcherAssert; import org.testng.annotations.Test; -import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; -import static org.hamcrest.Matchers.instanceOf; +import static org.elasticsearch.common.settings.ImmutableSettings.Builder.*; +import static org.hamcrest.Matchers.*; /** - * + * @author kimchy (shay.banon) */ public class SimpleIcuAnalysisTests { - @Test - public void testDefaultsIcuAnalysis() { + @Test public void testDefaultsIcuAnalysis() { Index index = new Index("test"); Injector parentInjector = new ModulesBuilder().add(new SettingsModule(EMPTY_SETTINGS), new EnvironmentModule(new Environment(EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector(); @@ -53,7 +52,19 @@ public class SimpleIcuAnalysisTests { AnalysisService analysisService = injector.getInstance(AnalysisService.class); + TokenizerFactory tokenizerFactory = analysisService.tokenizer("icu_tokenizer"); + MatcherAssert.assertThat(tokenizerFactory, instanceOf(IcuTokenizerFactory.class)); + TokenFilterFactory filterFactory = analysisService.tokenFilter("icu_normalizer"); MatcherAssert.assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class)); + + filterFactory = analysisService.tokenFilter("icu_folding"); + MatcherAssert.assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class)); + + filterFactory = analysisService.tokenFilter("icu_collation"); + MatcherAssert.assertThat(filterFactory, instanceOf(IcuCollationTokenFilterFactory.class)); + + filterFactory = analysisService.tokenFilter("icu_transform"); + MatcherAssert.assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class)); } }