Not registered as a global analyzer

The `kuromoji` analyzer is not registered as a global analyzer, so this fails:

    POST /_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか

    {
       "error": "ElasticsearchIllegalArgumentException[failed to find analyzer [kuromoji]]",
       "status": 400
    }

But this succeeds:

    PUT /t
    POST /t/_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか

Closes #31.
This commit is contained in:
Jun Ohtani 2014-04-28 12:56:58 +09:00 committed by David Pilato
parent c258213062
commit a8ad051435
3 changed files with 56 additions and 4 deletions

View File

@ -21,9 +21,9 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
@ -48,7 +48,7 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, tokenStream, stopTags); return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, tokenStream, stopTags);
} }
} }

View File

@ -23,9 +23,9 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ja.*; import org.apache.lucene.analysis.ja.*;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.*; import org.elasticsearch.index.analysis.*;
@ -42,6 +42,10 @@ public class KuromojiIndicesAnalysis extends AbstractComponent {
IndicesAnalysisService indicesAnalysisService) { IndicesAnalysisService indicesAnalysisService) {
super(settings); super(settings);
indicesAnalysisService.analyzerProviderFactories().put("kuromoji",
new PreBuiltAnalyzerProviderFactory("kuromoji", AnalyzerScope.INDICES,
new JapaneseAnalyzer(Lucene.ANALYZER_VERSION)));
indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark",
new KurumojiCharFilterFactoryFactory(new CharFilterFactory() { new KurumojiCharFilterFactoryFactory(new CharFilterFactory() {
@Override @Override
@ -94,7 +98,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent {
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION,
tokenStream, JapaneseAnalyzer tokenStream, JapaneseAnalyzer
.getDefaultStopTags()); .getDefaultStopTags());
} }

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.util.concurrent.ExecutionException;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.notNullValue;
@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE)
public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest {
@Test
public void testKuromojiAnalyzer() throws ExecutionException, InterruptedException {
AnalyzeResponse response = client().admin().indices()
.prepareAnalyze("JR新宿駅の近くにビールを飲みに行こうか").setAnalyzer("kuromoji")
.execute().get();
String[] expectedTokens = {"jr", "新宿", "", "近く", "ビール", "飲む", "行く"};
assertThat(response, notNullValue());
assertThat(response.getTokens().size(), is(7));
for (int i = 0; i < expectedTokens.length; i++) {
assertThat(response.getTokens().get(i).getTerm(), is(expectedTokens[i]));
}
}
}