parent
a3dce667b1
commit
330911389a
|
@ -88,7 +88,7 @@ The following are settings that can be set for a `kuromoji_tokenizer` tokenizer
|
|||
|:--------------------|:--------------------------------------------------------------------------------------------------------------------------|:------------------|
|
||||
| mode | Tokenization mode: this determines how the tokenizer handles compound and unknown words. `normal` and `search`, `extended`| `search` |
|
||||
| discard_punctuation | `true` if punctuation tokens should be dropped from the output. | `true` |
|
||||
| user_dict | set User Dictionary file | |
|
||||
| user_dictionary | set User Dictionary file | |
|
||||
|
||||
### Tokenization mode
|
||||
|
||||
|
|
1
pom.xml
1
pom.xml
|
@ -87,6 +87,7 @@
|
|||
<directory>src/test/java</directory>
|
||||
<includes>
|
||||
<include>**/*.json</include>
|
||||
<include>**/*.txt</include>
|
||||
</includes>
|
||||
</testResource>
|
||||
<testResource>
|
||||
|
|
|
@ -245,4 +245,24 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase {
|
|||
return buffer.toString();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKuromojiUserDict() throws IOException {
|
||||
AnalysisService analysisService = createAnalysisService();
|
||||
TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_user_dict");
|
||||
String source = "私は制限スピードを超える。";
|
||||
String[] expected = new String[]{"私", "は", "制限スピード", "を", "超える"};
|
||||
|
||||
Tokenizer tokenizer = tokenizerFactory.create();
|
||||
tokenizer.setReader(new StringReader(source));
|
||||
assertSimpleTSOutput(tokenizer, expected);
|
||||
}
|
||||
|
||||
// fix #59
|
||||
@Test
|
||||
public void testKuromojiEmptyUserDict() {
|
||||
AnalysisService analysisService = createAnalysisService();
|
||||
TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_empty_user_dict");
|
||||
assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -39,9 +39,16 @@
|
|||
|
||||
"tokenizer" : {
|
||||
"kuromoji" : {
|
||||
"type":"kuromoji_tokenizer"
|
||||
"type":"kuromoji_tokenizer"
|
||||
},
|
||||
"kuromoji_empty_user_dict" : {
|
||||
"type":"kuromoji_tokenizer",
|
||||
"user_dictionary":"org/elasticsearch/index/analysis/empty_user_dict.txt"
|
||||
},
|
||||
"kuromoji_user_dict" : {
|
||||
"type":"kuromoji_tokenizer",
|
||||
"user_dictionary":"org/elasticsearch/index/analysis/user_dict.txt"
|
||||
}
|
||||
|
||||
},
|
||||
"analyzer" : {
|
||||
"my_analyzer" : {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
制限スピード,制限スピード,セイゲンスピード,テスト名詞
|
Loading…
Reference in New Issue