Not registered as a global analyzer
The `kuromoji` analyzer is not registered as a global analyzer, so this fails: POST /_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか { "error": "ElasticsearchIllegalArgumentException[failed to find analyzer [kuromoji]]", "status": 400 } But this succeeds: PUT /t POST /t/_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか Closes #31.
This commit is contained in:
parent
c258213062
commit
a8ad051435
|
@ -21,9 +21,9 @@ package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter;
|
import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
|
@ -48,7 +48,7 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, tokenStream, stopTags);
|
return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, tokenStream, stopTags);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,9 +23,9 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.ja.*;
|
import org.apache.lucene.analysis.ja.*;
|
||||||
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
|
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.elasticsearch.common.component.AbstractComponent;
|
import org.elasticsearch.common.component.AbstractComponent;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.analysis.*;
|
import org.elasticsearch.index.analysis.*;
|
||||||
|
|
||||||
|
@ -42,6 +42,10 @@ public class KuromojiIndicesAnalysis extends AbstractComponent {
|
||||||
IndicesAnalysisService indicesAnalysisService) {
|
IndicesAnalysisService indicesAnalysisService) {
|
||||||
super(settings);
|
super(settings);
|
||||||
|
|
||||||
|
indicesAnalysisService.analyzerProviderFactories().put("kuromoji",
|
||||||
|
new PreBuiltAnalyzerProviderFactory("kuromoji", AnalyzerScope.INDICES,
|
||||||
|
new JapaneseAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||||
|
|
||||||
indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark",
|
indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark",
|
||||||
new KurumojiCharFilterFactoryFactory(new CharFilterFactory() {
|
new KurumojiCharFilterFactoryFactory(new CharFilterFactory() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -94,7 +98,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48,
|
return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION,
|
||||||
tokenStream, JapaneseAnalyzer
|
tokenStream, JapaneseAnalyzer
|
||||||
.getDefaultStopTags());
|
.getDefaultStopTags());
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
||||||
|
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
|
||||||
|
import static org.hamcrest.CoreMatchers.is;
|
||||||
|
import static org.hamcrest.CoreMatchers.notNullValue;
|
||||||
|
|
||||||
|
@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE)
|
||||||
|
public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKuromojiAnalyzer() throws ExecutionException, InterruptedException {
|
||||||
|
AnalyzeResponse response = client().admin().indices()
|
||||||
|
.prepareAnalyze("JR新宿駅の近くにビールを飲みに行こうか").setAnalyzer("kuromoji")
|
||||||
|
.execute().get();
|
||||||
|
|
||||||
|
String[] expectedTokens = {"jr", "新宿", "駅", "近く", "ビール", "飲む", "行く"};
|
||||||
|
|
||||||
|
assertThat(response, notNullValue());
|
||||||
|
assertThat(response.getTokens().size(), is(7));
|
||||||
|
|
||||||
|
for (int i = 0; i < expectedTokens.length; i++) {
|
||||||
|
assertThat(response.getTokens().get(i).getTerm(), is(expectedTokens[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue