From dd92830801a225cf2a878279e3c655e76cbe372b Mon Sep 17 00:00:00 2001 From: James Rodewig Date: Mon, 11 Nov 2019 08:49:01 -0500 Subject: [PATCH] [DOCS] Reformat condition token filter (#48775) --- .../condition-tokenfilter.asciidoc | 180 ++++++++++++------ 1 file changed, 120 insertions(+), 60 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc index 17dc46faad8..3e81d1536ae 100644 --- a/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc @@ -1,88 +1,148 @@ [[analysis-condition-tokenfilter]] -=== Conditional Token Filter +=== Conditional token filter +++++ +Conditional +++++ -The conditional token filter takes a predicate script and a list of subfilters, and -only applies the subfilters to the current token if it matches the predicate. +Applies a set of token filters to tokens that match conditions in a provided +predicate script. -[float] -=== Options -[horizontal] -filter:: a chain of token filters to apply to the current token if the predicate - matches. These can be any token filters defined elsewhere in the index mappings. +This filter uses Lucene's +https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.html[ConditionalTokenFilter]. -script:: a predicate script that determines whether or not the filters will be applied - to the current token. Note that only inline scripts are supported +[[analysis-condition-analyze-ex]] +==== Example -[float] -=== Settings example - -You can set it up like: +The following <> request uses the `condition` +filter to match tokens with fewer than 5 characters in `THE QUICK BROWN FOX`. +It then applies the <> filter to +those matching tokens, converting them to lowercase. [source,console] -------------------------------------------------- -PUT /condition_example +GET /_analyze { - "settings" : { - "analysis" : { - "analyzer" : { - "my_analyzer" : { - "tokenizer" : "standard", - "filter" : [ "my_condition" ] - } - }, - "filter" : { - "my_condition" : { - "type" : "condition", - "filter" : [ "lowercase" ], - "script" : { - "source" : "token.getTerm().length() < 5" <1> - } - } - } - } + "tokenizer": "standard", + "filter": [ + { + "type": "condition", + "filter": [ "lowercase" ], + "script": { + "source": "token.getTerm().length() < 5" + } } + ], + "text": "THE QUICK BROWN FOX" } -------------------------------------------------- -<1> This will only apply the lowercase filter to terms that are less than 5 -characters in length +The filter produces the following tokens: -And test it like: - -[source,console] +[source,text] -------------------------------------------------- -POST /condition_example/_analyze -{ - "analyzer" : "my_analyzer", - "text" : "What Flapdoodle" -} +[ the, QUICK, BROWN, fox ] -------------------------------------------------- -// TEST[continued] - -And it'd respond: +///////////////////// [source,console-result] -------------------------------------------------- { - "tokens": [ + "tokens" : [ { - "token": "what", <1> - "start_offset": 0, - "end_offset": 4, - "type": "", - "position": 0 + "token" : "the", + "start_offset" : 0, + "end_offset" : 3, + "type" : "", + "position" : 0 }, { - "token": "Flapdoodle", <2> - "start_offset": 5, - "end_offset": 15, - "type": "", - "position": 1 + "token" : "QUICK", + "start_offset" : 4, + "end_offset" : 9, + "type" : "", + "position" : 1 + }, + { + "token" : "BROWN", + "start_offset" : 10, + "end_offset" : 15, + "type" : "", + "position" : 2 + }, + { + "token" : "fox", + "start_offset" : 16, + "end_offset" : 19, + "type" : "", + "position" : 3 } ] } -------------------------------------------------- +///////////////////// -<1> The term `What` has been lowercased, because it is only 4 characters long -<2> The term `Flapdoodle` has been left in its original case, because it doesn't pass - the predicate +[[analysis-condition-tokenfilter-configure-parms]] +==== Configurable parameters + +`filter`:: ++ +-- +(Required, array of token filters) +Array of token filters. If a token matches the predicate script in the `script` +parameter, these filters are applied to the token in the order provided. + +These filters can include custom token filters defined in the index mapping. +-- + +`script`:: ++ +-- +(Required, <>) +Predicate script used to apply token filters. If a token +matches this script, the filters in the `filter` parameter are applied to the +token. + +For valid parameters, see <<_script_parameters>>. Only inline scripts are +supported. Painless scripts are executed in the +{painless}/painless-analysis-predicate-context.html[analysis predicate context] +and require a `token` property. +-- + +[[analysis-condition-tokenfilter-customize]] +==== Customize and add to an analyzer + +To customize the `condition` filter, duplicate it to create the basis +for a new custom token filter. You can modify the filter using its configurable +parameters. + +For example, the following <> request +uses a custom `condition` filter to configure a new +<>. The custom `condition` filter +matches the first token in a stream. It then reverses that matching token using +the <> filter. + +[source,console] +-------------------------------------------------- +PUT /palindrome_list +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_reverse_first_token": { + "tokenizer": "whitespace", + "filter": [ "reverse_first_token" ] + } + }, + "filter": { + "reverse_first_token": { + "type": "condition", + "filter": [ "reverse" ], + "script": { + "source": "token.getPosition() === 0" + } + } + } + } + } +} +-------------------------------------------------- \ No newline at end of file