From f27a7f4c1a63d068fdd075270d714a24440ee510 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 Aug 2016 09:56:00 -0400 Subject: [PATCH] Test response snippets in kuromoji docs Relates to #18160 --- docs/plugins/analysis-kuromoji.asciidoc | 44 ++++++++++++++----------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index 454ba3d2de2..cba44a4d645 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -189,22 +189,23 @@ The above `analyze` request returns the following: "start_offset" : 0, "end_offset" : 2, "type" : "word", - "position" : 1 + "position" : 0 }, { "token" : "スカイツリー", "start_offset" : 2, "end_offset" : 8, "type" : "word", - "position" : 2 + "position" : 1 } ] } -------------------------------------------------- +// TESTRESPONSE [[analysis-kuromoji-baseform]] ==== `kuromoji_baseform` token filter The `kuromoji_baseform` token filter replaces terms with their -BaseFormAttribute. This acts as a lemmatizer for verbs and adjectives. +BaseFormAttribute. This acts as a lemmatizer for verbs and adjectives. Example: [source,js] -------------------------------------------------- @@ -230,19 +231,21 @@ POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=飲み -------------------------------------------------- // CONSOLE -[source,text] +which responds with: + +[source,js] -------------------------------------------------- -# Result { "tokens" : [ { "token" : "飲む", "start_offset" : 0, "end_offset" : 2, "type" : "word", - "position" : 1 + "position" : 0 } ] } -------------------------------------------------- +// TESTRESPONSE [[analysis-kuromoji-speech]] ==== `kuromoji_part_of_speech` token filter @@ -255,6 +258,8 @@ part-of-speech tags. It accepts the following setting: An array of part-of-speech tags that should be removed. It defaults to the `stoptags.txt` file embedded in the `lucene-analyzer-kuromoji.jar`. +For example: + [source,js] -------------------------------------------------- PUT kuromoji_sample @@ -285,29 +290,30 @@ PUT kuromoji_sample } POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=寿司がおいしいね - -------------------------------------------------- // CONSOLE -[source,text] +Which responds with: + +[source,js] -------------------------------------------------- -# Result { "tokens" : [ { "token" : "寿司", "start_offset" : 0, "end_offset" : 2, "type" : "word", - "position" : 1 + "position" : 0 }, { "token" : "おいしい", "start_offset" : 3, "end_offset" : 7, "type" : "word", - "position" : 3 + "position" : 2 } ] } -------------------------------------------------- +// TESTRESPONSE [[analysis-kuromoji-readingform]] ==== `kuromoji_readingform` token filter @@ -359,7 +365,6 @@ PUT kuromoji_sample POST kuromoji_sample/_analyze?analyzer=katakana_analyzer&text=寿司 <1> POST kuromoji_sample/_analyze?analyzer=romaji_analyzer&text=寿司 <2> - -------------------------------------------------- // CONSOLE @@ -410,7 +415,6 @@ PUT kuromoji_sample POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=コピー <1> POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=サーバー <2> - -------------------------------------------------- // CONSOLE @@ -462,9 +466,8 @@ POST kuromoji_sample/_analyze?analyzer=analyzer_with_ja_stop&text=ストップ The above request returns: -[source,text] +[source,js] -------------------------------------------------- -# Result { "tokens" : [ { "token" : "消える", @@ -480,7 +483,7 @@ The above request returns: ===== `kuromoji_number` token filter The `kuromoji_number` token filter normalizes Japanese numbers (kansūji) -to regular Arabic decimal numbers in half-width characters. +to regular Arabic decimal numbers in half-width characters. For example: [source,js] -------------------------------------------------- @@ -503,20 +506,21 @@ PUT kuromoji_sample } POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=一〇〇〇 - -------------------------------------------------- // CONSOLE -[source,text] +Which results in: + +[source,js] -------------------------------------------------- -# Result { "tokens" : [ { "token" : "1000", "start_offset" : 0, "end_offset" : 4, "type" : "word", - "position" : 1 + "position" : 0 } ] } -------------------------------------------------- +// TESTRESPONSE