Deprecate smartcn_word

Looks like `WordTokenFilter` has been [deprecated in Lucene 4.8](http://lucene.apache.org/core/4_8_0/analyzers-smartcn/org/apache/lucene/analysis/cn/smart/WordTokenFilter.html) and looking at the javadoc, it looks like that only the [HMMChineseTokenizer](http://lucene.apache.org/core/4_8_0/analyzers-smartcn/org/apache/lucene/analysis/cn/smart/HMMChineseTokenizer.html) will be supported. We need to deprecate `smartcn_word` and `smartcn_sentence`. We add `smartcn_tokenizer` which does the both things. Closes #22. (cherry picked from commit 64dcb9b)
2014-06-27 16:12:13 +02:00 · 2014-06-27 16:12:13 +02:00 · d063fe6019
parent 45dfe9abb6
commit d063fe6019
8 changed files with 71 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -20,7 +20,9 @@ Please read documentation relative to the version you are using:
 * [3.0.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-smartcn/blob/master/README.md)
-The plugin includes the `smartcn` analyzer, `smartcn_sentence` tokenizer, and `smartcn_word` token filter.
+The plugin includes the `smartcn` analyzer and `smartcn_tokenizer` tokenizer.
 Note that `smartcn_word` token filter and `smartcn_sentence` have been deprecated.
 License
 -------
--- a/src/main/java/org/elasticsearch/index/analysis/SmartChineseAnalysisBinderProcessor.java
+++ b/src/main/java/org/elasticsearch/index/analysis/SmartChineseAnalysisBinderProcessor.java
@ -30,11 +30,14 @@ public class SmartChineseAnalysisBinderProcessor extends AnalysisModule.Analysis
    @Override
    public void processTokenizers(TokenizersBindings tokenizersBindings) {
        // TODO Remove it in 2.3.0 (was deprecated: see https://github.com/elasticsearch/elasticsearch-analysis-smartcn/issues/22)
        tokenizersBindings.processTokenizer("smartcn_sentence", SmartChineseSentenceTokenizerFactory.class);
        tokenizersBindings.processTokenizer("smartcn_tokenizer", SmartChineseTokenizerTokenizerFactory.class);
    }
    @Override
    public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
        // TODO Remove it in 2.3.0 (was deprecated: see https://github.com/elasticsearch/elasticsearch-analysis-smartcn/issues/22)
        tokenFiltersBindings.processTokenFilter("smartcn_word", SmartChineseWordTokenFilterFactory.class);
    }
 }
--- a/src/main/java/org/elasticsearch/index/analysis/SmartChineseSentenceTokenizerFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/SmartChineseSentenceTokenizerFactory.java
@ -30,7 +30,9 @@ import org.elasticsearch.index.settings.IndexSettings;
 import java.io.Reader;
 /**
 * SentenceTokenizer has been deprecated in Lucene 4.8
 */
@Deprecated
 public class SmartChineseSentenceTokenizerFactory extends AbstractTokenizerFactory {
    @Inject
--- a/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java
@ -0,0 +1,43 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.cn.smart.HMMChineseTokenizer;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.inject.assistedinject.Assisted;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.settings.IndexSettings;
 import java.io.Reader;
 public class SmartChineseTokenizerTokenizerFactory extends AbstractTokenizerFactory {
    @Inject
    public SmartChineseTokenizerTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
        super(index, indexSettings, name, settings);
    }
    @Override
    public Tokenizer create(Reader reader) {
        return new HMMChineseTokenizer(reader);
    }
 }
--- a/src/main/java/org/elasticsearch/index/analysis/SmartChineseWordTokenFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/SmartChineseWordTokenFilterFactory.java
@ -28,7 +28,9 @@ import org.elasticsearch.index.Index;
 import org.elasticsearch.index.settings.IndexSettings;
 /**
 * WordTokenFilter has been deprecated in Lucene 4.8
 */
@Deprecated
 public class SmartChineseWordTokenFilterFactory extends AbstractTokenFilterFactory {
    @Inject
--- a/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysis.java
+++ b/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysis.java
@ -21,6 +21,7 @@ package org.elasticsearch.indices.analysis.smartcn;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.cn.smart.HMMChineseTokenizer;
 import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
 import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
 import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
@ -47,6 +48,7 @@ public class SmartChineseIndicesAnalysis extends AbstractComponent {
        indicesAnalysisService.analyzerProviderFactories().put("smartcn", new PreBuiltAnalyzerProviderFactory("smartcn", AnalyzerScope.INDICES, new SmartChineseAnalyzer(Lucene.ANALYZER_VERSION)));
        // Register smartcn_word token filter
        // TODO Remove it in 2.3.0 (was deprecated: see https://github.com/elasticsearch/elasticsearch-analysis-smartcn/issues/22)
        indicesAnalysisService.tokenFilterFactories().put("smartcn_word", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
            @Override public String name() {
                return "smartcn_word";
@ -70,6 +72,18 @@ public class SmartChineseIndicesAnalysis extends AbstractComponent {
            }
        }));
        // Register smartcn_sentence tokenizer
        indicesAnalysisService.tokenizerFactories().put("smartcn_tokenizer", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
            @Override
            public String name() {
                return "smartcn_tokenizer";
            }
            @Override
            public Tokenizer create(Reader reader) {
                return new HMMChineseTokenizer(reader);
            }
        }));
    }
 }
--- a/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java
+++ b/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java
@ -53,10 +53,7 @@ public class SimpleSmartChineseAnalysisTests extends ElasticsearchTestCase {
        AnalysisService analysisService = injector.getInstance(AnalysisService.class);
-        TokenizerFactory tokenizerFactory = analysisService.tokenizer("smartcn_sentence");
+        TokenizerFactory tokenizerFactory = analysisService.tokenizer("smartcn_tokenizer");
-        MatcherAssert.assertThat(tokenizerFactory, instanceOf(SmartChineseSentenceTokenizerFactory.class));
+        MatcherAssert.assertThat(tokenizerFactory, instanceOf(SmartChineseTokenizerTokenizerFactory.class));
        TokenFilterFactory filterFactory = analysisService.tokenFilter("smartcn_word");
        MatcherAssert.assertThat(filterFactory, instanceOf(SmartChineseWordTokenFilterFactory.class));
    }
 }
--- a/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseIntegrationTests.java
+++ b/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseIntegrationTests.java
@ -54,20 +54,10 @@ public class SimpleSmartChineseIntegrationTests extends ElasticsearchIntegration
    @Test
    public void testSmartcnTokenizer() throws ExecutionException, InterruptedException {
        AnalyzeResponse response = client().admin().indices()
-                .prepareAnalyze("叻出色").setTokenizer("smartcn_sentence")
+                .prepareAnalyze("叻出色").setTokenizer("smartcn_tokenizer")
                .execute().get();
        assertThat(response, notNullValue());
-        assertThat(response.getTokens().size(), is(1));
+        assertThat(response.getTokens().size(), is(2));
    }
    @Test
    public void testSmartcnTokenFilter() throws ExecutionException, InterruptedException {
        AnalyzeResponse response = client().admin().indices()
                .prepareAnalyze("叻出色").setTokenFilters("smartcn_word")
                .execute().get();
        assertThat(response, notNullValue());
        assertThat(response.getTokens().size(), is(3));
    }
 }