From 06dc1fbd96cff77adf3baf15669906bc5bc50b18 Mon Sep 17 00:00:00 2001 From: James Rodewig Date: Wed, 23 Oct 2019 15:06:18 -0500 Subject: [PATCH] [DOCS] Reformat ASCII folding token filter docs (#48143) --- .../apostrophe-tokenfilter.asciidoc | 2 +- .../asciifolding-tokenfilter.asciidoc | 107 ++++++++++++++++-- 2 files changed, 98 insertions(+), 11 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc index ac64ef95423..330ab16b4ef 100644 --- a/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc @@ -8,7 +8,7 @@ Strips all characters after an apostrophe, including the apostrophe itself. This filter is included in {es}'s built-in <>. It uses Lucene's -https://lucene.apache.org/core/4_8_0/analyzers-common/org/apache/lucene/analysis/tr/ApostropheFilter.html[ApostropheFilter], +https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/tr/ApostropheFilter.html[ApostropheFilter], which was built for the Turkish language. diff --git a/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc index 49f1b9869b0..432f4709d8c 100644 --- a/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc @@ -1,10 +1,83 @@ [[analysis-asciifolding-tokenfilter]] -=== ASCII Folding Token Filter +=== ASCII folding token filter +++++ +ASCII folding +++++ -A token filter of type `asciifolding` that converts alphabetic, numeric, -and symbolic Unicode characters which are not in the first 127 ASCII -characters (the "Basic Latin" Unicode block) into their ASCII -equivalents, if one exists. Example: +Converts alphabetic, numeric, and symbolic characters that are not in the Basic +Latin Unicode block (first 127 ASCII characters) to their ASCII equivalent, if +one exists. For example, the filter changes `à` to `a`. + +This filter uses Lucene's +https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html[ASCIIFoldingFilter]. + +[[analysis-asciifolding-tokenfilter-analyze-ex]] +==== Example + +The following <> request uses the `asciifolding` +filter to drop the diacritical marks in `açaí à la carte`: + +[source,console] +-------------------------------------------------- +GET /_analyze +{ + "tokenizer" : "standard", + "filter" : ["asciifolding"], + "text" : "açaí à la carte" +} +-------------------------------------------------- + +The filter produces the following tokens: + +[source,text] +-------------------------------------------------- +[ acai, a, la, carte ] +-------------------------------------------------- + +///////////////////// +[source,console-result] +-------------------------------------------------- +{ + "tokens" : [ + { + "token" : "acai", + "start_offset" : 0, + "end_offset" : 4, + "type" : "", + "position" : 0 + }, + { + "token" : "a", + "start_offset" : 5, + "end_offset" : 6, + "type" : "", + "position" : 1 + }, + { + "token" : "la", + "start_offset" : 7, + "end_offset" : 9, + "type" : "", + "position" : 2 + }, + { + "token" : "carte", + "start_offset" : 10, + "end_offset" : 15, + "type" : "", + "position" : 3 + } + ] +} +-------------------------------------------------- +///////////////////// + +[[analysis-asciifolding-tokenfilter-analyzer-ex]] +==== Add to an analyzer + +The following <> request uses the +`asciifolding` filter to configure a new +<>. [source,console] -------------------------------------------------- @@ -13,7 +86,7 @@ PUT /asciifold_example "settings" : { "analysis" : { "analyzer" : { - "default" : { + "standard_asciifolding" : { "tokenizer" : "standard", "filter" : ["asciifolding"] } @@ -23,9 +96,23 @@ PUT /asciifold_example } -------------------------------------------------- -Accepts `preserve_original` setting which defaults to false but if true -will keep the original token as well as emit the folded token. For -example: +[[analysis-asciifolding-tokenfilter-configure-parms]] +==== Configurable parameters + +`preserve_original`:: +(Optional, boolean) +If `true`, emit both original tokens and folded tokens. +Defaults to `false`. + +[[analysis-asciifolding-tokenfilter-customize]] +==== Customize + +To customize the `asciifolding` filter, duplicate it to create the basis +for a new custom token filter. You can modify the filter using its configurable +parameters. + +For example, the following request creates a custom `asciifolding` filter with +`preserve_original` set to true: [source,console] -------------------------------------------------- @@ -34,7 +121,7 @@ PUT /asciifold_example "settings" : { "analysis" : { "analyzer" : { - "default" : { + "standard_asciifolding" : { "tokenizer" : "standard", "filter" : ["my_ascii_folding"] }