Add user dictionary test case

Closes #59
This commit is contained in:
Jun Ohtani 2015-05-14 13:41:44 +09:00
parent a3dce667b1
commit 330911389a
6 changed files with 32 additions and 3 deletions

View File

@ -88,7 +88,7 @@ The following are settings that can be set for a `kuromoji_tokenizer` tokenizer
|:--------------------|:--------------------------------------------------------------------------------------------------------------------------|:------------------|
| mode | Tokenization mode: this determines how the tokenizer handles compound and unknown words. `normal` and `search`, `extended`| `search` |
| discard_punctuation | `true` if punctuation tokens should be dropped from the output. | `true` |
| user_dict | set User Dictionary file | |
| user_dictionary | set User Dictionary file | |
### Tokenization mode

View File

@ -87,6 +87,7 @@
<directory>src/test/java</directory>
<includes>
<include>**/*.json</include>
<include>**/*.txt</include>
</includes>
</testResource>
<testResource>

View File

@ -245,4 +245,24 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase {
return buffer.toString();
}
@Test
public void testKuromojiUserDict() throws IOException {
AnalysisService analysisService = createAnalysisService();
TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_user_dict");
String source = "私は制限スピードを超える。";
String[] expected = new String[]{"", "", "制限スピード", "", "超える"};
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(new StringReader(source));
assertSimpleTSOutput(tokenizer, expected);
}
// fix #59
@Test
public void testKuromojiEmptyUserDict() {
AnalysisService analysisService = createAnalysisService();
TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_empty_user_dict");
assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class));
}
}

View File

@ -39,9 +39,16 @@
"tokenizer" : {
"kuromoji" : {
"type":"kuromoji_tokenizer"
"type":"kuromoji_tokenizer"
},
"kuromoji_empty_user_dict" : {
"type":"kuromoji_tokenizer",
"user_dictionary":"org/elasticsearch/index/analysis/empty_user_dict.txt"
},
"kuromoji_user_dict" : {
"type":"kuromoji_tokenizer",
"user_dictionary":"org/elasticsearch/index/analysis/user_dict.txt"
}
},
"analyzer" : {
"my_analyzer" : {

View File

@ -0,0 +1 @@
制限スピード,制限スピード,セイゲンスピード,テスト名詞