From d0f5bc340367a06dba32e250816f06c01929f564 Mon Sep 17 00:00:00 2001 From: kimchy Date: Tue, 24 Aug 2010 23:11:22 +0300 Subject: [PATCH] add russian letter tokenizer --- .../analysis/AbstractTokenizerFactory.java | 2 +- .../index/analysis/AnalysisModule.java | 4 +- .../RussianLetterTokenizerFactory.java | 44 +++++++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianLetterTokenizerFactory.java diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java index 68dfd8feb67..bfd2008379c 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java @@ -25,7 +25,7 @@ import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; /** - * @author kimchy (Shay Banon) + * @author kimchy (shay.banon) */ public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java index c165732373d..2a7810962a5 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java @@ -305,6 +305,8 @@ public class AnalysisModule extends AbstractModule { tokenizersBindings.processTokenizer("letter", LetterTokenizerFactory.class); tokenizersBindings.processTokenizer("lowercase", LowerCaseTokenizerFactory.class); tokenizersBindings.processTokenizer("whitespace", WhitespaceTokenizerFactory.class); + tokenizersBindings.processTokenizer("russian_letter", RussianLetterTokenizerFactory.class); + tokenizersBindings.processTokenizer("russianLetter", RussianLetterTokenizerFactory.class); } @Override public void processAnalyzers(AnalyzersBindings analyzersBindings) { @@ -347,7 +349,7 @@ public class AnalysisModule extends AbstractModule { analyzersBindings.processAnalyzer("arabic", ArabicAnalyzerProvider.class); analyzersBindings.processAnalyzer("brazilian", BrazilianAnalyzerProvider.class); analyzersBindings.processAnalyzer("chinese", ChineseAnalyzerProvider.class); - analyzersBindings.processAnalyzer("cjk", ChineseAnalyzerProvider.class); + analyzersBindings.processAnalyzer("cjk", CjkAnalyzerProvider.class); analyzersBindings.processAnalyzer("czech", CzechAnalyzerProvider.class); analyzersBindings.processAnalyzer("dutch", DutchAnalyzerProvider.class); analyzersBindings.processAnalyzer("french", FrenchAnalyzerProvider.class); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianLetterTokenizerFactory.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianLetterTokenizerFactory.java new file mode 100644 index 00000000000..ecb5058394d --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianLetterTokenizerFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.ru.RussianLetterTokenizer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +import java.io.Reader; + +/** + * @author kimchy (shay.banon) + */ +public class RussianLetterTokenizerFactory extends AbstractTokenizerFactory { + + @Inject public RussianLetterTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name); + } + + @Override public Tokenizer create(Reader reader) { + return new RussianLetterTokenizer(reader); + } +}