diff --git a/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java new file mode 100644 index 00000000000..736ba204046 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import java.io.Reader; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * + * @author joerg + */ +public class IcuTokenizerFactory extends AbstractTokenizerFactory { + + @Inject public IcuTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + } + + @Override + public Tokenizer create(Reader reader) { + return new ICUTokenizer(reader); + } + +} diff --git a/src/main/java/org/elasticsearch/index/analysis/IcuTransformTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/IcuTransformTokenFilterFactory.java new file mode 100644 index 00000000000..1f34d02b3bd --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/IcuTransformTokenFilterFactory.java @@ -0,0 +1,52 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.icu.ICUTransformFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import com.ibm.icu.text.Transliterator; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + + +/** + * @author joergprante + */ +public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory { + + private final String id; + private final int dir; + private final Transliterator transliterator; + + @Inject public IcuTransformTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + this.id = settings.get("id", "Null"); + String s = settings.get("dir", "forward"); + this.dir = "forward".equals(s) ? Transliterator.FORWARD : Transliterator.REVERSE; + this.transliterator = Transliterator.getInstance(id, dir); + } + + @Override public TokenStream create(TokenStream tokenStream) { + return new ICUTransformFilter(tokenStream, transliterator); + } +} \ No newline at end of file