From d336faa0b092dc2ff331982f19800a2771250403 Mon Sep 17 00:00:00 2001 From: James Rodewig Date: Mon, 2 Mar 2020 07:47:38 -0500 Subject: [PATCH] [DOCS] Reformat trim token filter docs (#51649) Makes the following changes to the `trim` token filter docs: * Updates description * Adds a link to the related Lucene filter * Adds tip about removing whitespace using tokenizers * Adds detailed analyze snippets * Adds custom analyzer snippet --- .../tokenfilters/trim-tokenfilter.asciidoc | 105 +++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc index 1373811b0cb..19d47f203af 100644 --- a/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc @@ -4,4 +4,107 @@ Trim ++++ -The `trim` token filter trims the whitespace surrounding a token. +Removes leading and trailing whitespace from each token in a stream. + +The `trim` filter uses Lucene's +https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/TrimFilter.html[TrimFilter]. + +[TIP] +==== +Many commonly used tokenizers, such as the +<> or +<> tokenizer, remove whitespace by +default. When using these tokenizers, you don't need to add a separate `trim` +filter. +==== + +[[analysis-trim-tokenfilter-analyze-ex]] +==== Example + +To see how the `trim` filter works, you first need to produce a token +containing whitespace. + +The following <> request uses the +<> tokenizer to produce a token for +`" fox "`. + +[source,console] +---- +GET _analyze +{ + "tokenizer" : "keyword", + "text" : " fox " +} +---- + +The API returns the following response. Note the `" fox "` token contains +the original text's whitespace. + +[source,console-result] +---- +{ + "tokens": [ + { + "token": " fox ", + "start_offset": 0, + "end_offset": 5, + "type": "word", + "position": 0 + } + ] +} +---- + +To remove the whitespace, add the `trim` filter to the previous analyze API +request. + +[source,console] +---- +GET _analyze +{ + "tokenizer" : "keyword", + "filter" : ["trim"], + "text" : " fox " +} +---- + +The API returns the following response. The returned `fox` token does not +include any leading or trailing whitespace. + +[source,console-result] +---- +{ + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 5, + "type": "word", + "position": 0 + } + ] +} +---- + +[[analysis-trim-tokenfilter-analyzer-ex]] +==== Add to an analyzer + +The following <> request uses the `trim` +filter to configure a new <>. + +[source,console] +---- +PUT trim_example +{ + "settings": { + "analysis": { + "analyzer": { + "keyword_trim": { + "tokenizer": "keyword", + "filter": [ "trim" ] + } + } + } + } +} +---- \ No newline at end of file