From e6a469cc7410c2ba13a59753198bca6c5d848ff7 Mon Sep 17 00:00:00 2001 From: James Rodewig Date: Fri, 3 Jan 2020 08:34:11 -0500 Subject: [PATCH] [DOCS] Reformat uppercase token filter docs (#50555) * Updates the description and adds a Lucene link * Adds analyze and custom analyzer snippets --- .../uppercase-tokenfilter.asciidoc | 101 +++++++++++++++++- 1 file changed, 99 insertions(+), 2 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc index c745f247ec3..06ea2c3279c 100644 --- a/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc @@ -4,5 +4,102 @@ Uppercase ++++ -A token filter of type `uppercase` that normalizes token text to upper -case. +Changes token text to uppercase. For example, you can use the `uppercase` filter +to change `the Lazy DoG` to `THE LAZY DOG`. + +This filter uses Lucene's +https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html[UpperCaseFilter]. + +[WARNING] +==== +Depending on the language, an uppercase character can map to multiple +lowercase characters. Using the `uppercase` filter could result in the loss of +lowercase character information. + +To avoid this loss but still have a consistent lettercase, use the <> filter instead. +==== + +[[analysis-uppercase-tokenfilter-analyze-ex]] +==== Example + +The following <> request uses the default +`uppercase` filter to change the `the Quick FoX JUMPs` to uppercase: + +[source,console] +-------------------------------------------------- +GET _analyze +{ + "tokenizer" : "standard", + "filter" : ["uppercase"], + "text" : "the Quick FoX JUMPs" +} +-------------------------------------------------- + +The filter produces the following tokens: + +[source,text] +-------------------------------------------------- +[ THE, QUICK, FOX, JUMPS ] +-------------------------------------------------- + +///////////////////// +[source,console-result] +-------------------------------------------------- +{ + "tokens" : [ + { + "token" : "THE", + "start_offset" : 0, + "end_offset" : 3, + "type" : "", + "position" : 0 + }, + { + "token" : "QUICK", + "start_offset" : 4, + "end_offset" : 9, + "type" : "", + "position" : 1 + }, + { + "token" : "FOX", + "start_offset" : 10, + "end_offset" : 13, + "type" : "", + "position" : 2 + }, + { + "token" : "JUMPS", + "start_offset" : 14, + "end_offset" : 19, + "type" : "", + "position" : 3 + } + ] +} +-------------------------------------------------- +///////////////////// + +[[analysis-uppercase-tokenfilter-analyzer-ex]] +==== Add to an analyzer + +The following <> request uses the +`uppercase` filter to configure a new +<>. + +[source,console] +-------------------------------------------------- +PUT uppercase_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "whitespace_uppercase" : { + "tokenizer" : "whitespace", + "filter" : ["uppercase"] + } + } + } + } +} +--------------------------------------------------