diff --git a/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc index 6ce084c183f..ca6b1d5115e 100644 --- a/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc @@ -4,7 +4,147 @@ Unique ++++ -The `unique` token filter can be used to only index unique tokens during -analysis. By default it is applied on all the token stream. If -`only_on_same_position` is set to `true`, it will only remove duplicate -tokens on the same position. +Removes duplicate tokens from a stream. For example, you can use the `unique` +filter to change `the lazy lazy dog` to `the lazy dog`. + +If the `only_on_same_position` parameter is set to `true`, the `unique` filter +removes only duplicate tokens _in the same position_. + +[NOTE] +==== +When `only_on_same_position` is `true`, the `unique` filter works the same as +<> filter. +==== + +[[analysis-unique-tokenfilter-analyze-ex]] +==== Example + +The following <> request uses the `unique` filter +to remove duplicate tokens from `the quick fox jumps the lazy fox`: + +[source,console] +-------------------------------------------------- +GET _analyze +{ + "tokenizer" : "whitespace", + "filter" : ["unique"], + "text" : "the quick fox jumps the lazy fox" +} +-------------------------------------------------- + +The filter removes duplicated tokens for `the` and `fox`, producing the +following output: + +[source,text] +-------------------------------------------------- +[ the, quick, fox, jumps, lazy ] +-------------------------------------------------- + +///////////////////// +[source,console-result] +-------------------------------------------------- +{ + "tokens" : [ + { + "token" : "the", + "start_offset" : 0, + "end_offset" : 3, + "type" : "word", + "position" : 0 + }, + { + "token" : "quick", + "start_offset" : 4, + "end_offset" : 9, + "type" : "word", + "position" : 1 + }, + { + "token" : "fox", + "start_offset" : 10, + "end_offset" : 13, + "type" : "word", + "position" : 2 + }, + { + "token" : "jumps", + "start_offset" : 14, + "end_offset" : 19, + "type" : "word", + "position" : 3 + }, + { + "token" : "lazy", + "start_offset" : 24, + "end_offset" : 28, + "type" : "word", + "position" : 4 + } + ] +} +-------------------------------------------------- +///////////////////// + +[[analysis-unique-tokenfilter-analyzer-ex]] +==== Add to an analyzer + +The following <> request uses the +`unique` filter to configure a new <>. + +[source,console] +-------------------------------------------------- +PUT custom_unique_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "standard_truncate" : { + "tokenizer" : "standard", + "filter" : ["unique"] + } + } + } + } +} +-------------------------------------------------- + +[[analysis-unique-tokenfilter-configure-parms]] +==== Configurable parameters + +`only_on_same_position`:: +(Optional, boolean) +If `true`, only remove duplicate tokens in the same position. +Defaults to `false`. + +[[analysis-unique-tokenfilter-customize]] +==== Customize + +To customize the `unique` filter, duplicate it to create the basis +for a new custom token filter. You can modify the filter using its configurable +parameters. + +For example, the following request creates a custom `unique` filter with +`only_on_same_position` set to `true`. + +[source,console] +-------------------------------------------------- +PUT letter_unique_pos_example +{ + "settings": { + "analysis": { + "analyzer": { + "letter_unique_pos": { + "tokenizer": "letter", + "filter": [ "unique_pos" ] + } + }, + "filter": { + "unique_pos": { + "type": "unique", + "only_on_same_position": true + } + } + } + } +} +--------------------------------------------------