From cf059378d1432fc1ec83317f8e0313e60c55e874 Mon Sep 17 00:00:00 2001 From: Clinton Gormley Date: Sat, 21 Jun 2014 18:42:38 +0200 Subject: [PATCH] Docs: Updated stop token filter docs --- .../tokenfilters/stop-tokenfilter.asciidoc | 87 ++++++++++++++----- 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc index 14b3a32b2f8..67c113cd331 100644 --- a/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc @@ -7,29 +7,72 @@ streams. The following are settings that can be set for a `stop` token filter type: -[cols="<,<",options="header",] -|======================================================================= -|Setting |Description -|`stopwords` |A list of stop words to use. Defaults to english stop -words. +[horizontal] +`stopwords`:: -|`stopwords_path` |A path (either relative to `config` location, or -absolute) to a stopwords file configuration. Each stop word should be in -its own "line" (separated by a line break). The file must be UTF-8 -encoded. + A list of stop words to use. Defaults to `_english_` stop words. -|`ignore_case` |Set to `true` to lower case all words first. Defaults to -`false`. +`stopwords_path`:: -|`remove_trailing` |Set to `false` in order to not ignore the last term of -a search if it is a stop word. This is very useful for the completion -suggester as a query like `green a` can be extended to `green apple` even -though you remove stop words in general. Defaults to `true`. -|======================================================================= + A path (either relative to `config` location, or absolute) to a stopwords + file configuration. Each stop word should be in its own "line" (separated + by a line break). The file must be UTF-8 encoded. -stopwords allow for custom language specific expansion of default -stopwords. It follows the `_lang_` notation and supports: arabic, -armenian, basque, brazilian, bulgarian, catalan, czech, danish, dutch, -english, finnish, french, galician, german, greek, hindi, hungarian, -indonesian, italian, norwegian, persian, portuguese, romanian, russian, -spanish, swedish, turkish. +`ignore_case`:: + + Set to `true` to lower case all words first. Defaults to `false`. + +`remove_trailing`:: + + Set to `false` in order to not ignore the last term of a search if it is a + stop word. This is very useful for the completion suggester as a query + like `green a` can be extended to `green apple` even though you remove + stop words in general. Defaults to `true`. + +The `stopwords` parameter accepts either an array of stopwords: + +[source,json] +------------------------------------ +PUT /my_index +{ + "settings": { + "analysis": { + "filter": { + "my_stop": { + "type": "stop", + "stopwords": ["and", "is", "the"] + } + } + } + } +} +------------------------------------ + +or a predefined language-specific list: + +[source,json] +------------------------------------ +PUT /my_index +{ + "settings": { + "analysis": { + "filter": { + "my_stop": { + "type": "stop", + "stopwords": "_english_" + } + } + } + } +} +------------------------------------ + +Elasticsearch provides the following predefined list of languages: + +`_arabic_`, `_armenian_`, `_basque_`, `_brazilian_`, `_bulgarian_`, +`_catalan_`, `_czech_`, `_danish_`, `_dutch_`, `_english_`, `_finnish_`, +`_french_`, `_galician_`, `_german_`, `_greek_`, `_hindi_`, `_hungarian_`, +`_indonesian_`, `_italian_`, `_norwegian_`, `_persian_`, `_portuguese_`, +`_romanian_`, `_russian_`, `_spanish_`, `_swedish_`, `_turkish_`. + +For the empty stopwords list (to disable stopwords) use: `_none_`.