From a8ad05143548fd3051d40d1b62325fcf7ac5b6bf Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Mon, 28 Apr 2014 12:56:58 +0900 Subject: [PATCH] Not registered as a global analyzer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `kuromoji` analyzer is not registered as a global analyzer, so this fails: POST /_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか { "error": "ElasticsearchIllegalArgumentException[failed to find analyzer [kuromoji]]", "status": 400 } But this succeeds: PUT /t POST /t/_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか Closes #31. --- .../KuromojiPartOfSpeechFilterFactory.java | 4 +- .../analysis/KuromojiIndicesAnalysis.java | 8 +++- .../analysis/KuromojiIntegrationTests.java | 48 +++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index 3c4cbaeea5f..020cd93b6a5 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -21,9 +21,9 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; -import org.apache.lucene.util.Version; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; @@ -48,7 +48,7 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, tokenStream, stopTags); + return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, tokenStream, stopTags); } } diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 8a15426cd3a..04c34a5f081 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ja.*; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; -import org.apache.lucene.util.Version; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.*; @@ -42,6 +42,10 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { IndicesAnalysisService indicesAnalysisService) { super(settings); + indicesAnalysisService.analyzerProviderFactories().put("kuromoji", + new PreBuiltAnalyzerProviderFactory("kuromoji", AnalyzerScope.INDICES, + new JapaneseAnalyzer(Lucene.ANALYZER_VERSION))); + indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", new KurumojiCharFilterFactoryFactory(new CharFilterFactory() { @Override @@ -94,7 +98,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, + return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, tokenStream, JapaneseAnalyzer .getDefaultStopTags()); } diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java new file mode 100644 index 00000000000..5d909959db3 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.junit.Test; + +import java.util.concurrent.ExecutionException; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; + +@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE) +public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest { + + @Test + public void testKuromojiAnalyzer() throws ExecutionException, InterruptedException { + AnalyzeResponse response = client().admin().indices() + .prepareAnalyze("JR新宿駅の近くにビールを飲みに行こうか").setAnalyzer("kuromoji") + .execute().get(); + + String[] expectedTokens = {"jr", "新宿", "駅", "近く", "ビール", "飲む", "行く"}; + + assertThat(response, notNullValue()); + assertThat(response.getTokens().size(), is(7)); + + for (int i = 0; i < expectedTokens.length; i++) { + assertThat(response.getTokens().get(i).getTerm(), is(expectedTokens[i])); + } + } +}