diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index e3a58360e9b..bea12470cb0 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; @@ -38,6 +39,8 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor List wordList = Analysis.getWordList(env, settings, "stoptags"); if (wordList != null) { stopTags.addAll(wordList); + } else { + stopTags.addAll(JapaneseAnalyzer.getDefaultStopTags()); } } diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index c0271c99784..1be8a22fb2a 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -93,6 +93,21 @@ public class KuromojiAnalysisTests extends ESTestCase { assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } + public void testPartOfSpeechFilter() throws IOException { + TestAnalysis analysis = createTestAnalysis(); + TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_part_of_speech"); + + assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); + + String source = "寿司がおいしいね"; + String[] expected_tokens = new String[]{"寿司", "おいしい"}; + + Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); + + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens); + } + public void testReadingFormFilterFactory() throws IOException { TestAnalysis analysis = createTestAnalysis(); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_rf"); @@ -208,7 +223,7 @@ public class KuromojiAnalysisTests extends ESTestCase { int i = 0; while (stream.incrementToken()) { assertThat(expected.length, greaterThan(i)); - assertThat( "expected different term at index " + i, expected[i++], equalTo(termAttr.toString())); + assertThat("expected different term at index " + i, termAttr.toString(), equalTo(expected[i++])); } assertThat("not all tokens produced", i, equalTo(expected.length)); }