From a9fdcadf01cb6a3c74a3c94ca54fdac4e3ed2872 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Mon, 4 Nov 2013 17:41:43 +0100 Subject: [PATCH] [DOCS] Added documentation for the keep word token filter --- docs/reference/analysis/tokenfilters.asciidoc | 2 + .../keep-words-tokenfilter.asciidoc | 49 +++++++++++++++++++ .../normalization-tokenfilter.asciidoc | 2 + 3 files changed, 53 insertions(+) create mode 100644 docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc index c13a820dd98..ad72fb70ccb 100644 --- a/docs/reference/analysis/tokenfilters.asciidoc +++ b/docs/reference/analysis/tokenfilters.asciidoc @@ -70,3 +70,5 @@ include::tokenfilters/common-grams-tokenfilter.asciidoc[] include::tokenfilters/normalization-tokenfilter.asciidoc[] include::tokenfilters/delimited-payload-tokenfilter.asciidoc[] + +include::tokenfilters/keep-words-tokenfilter.asciidoc[] diff --git a/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc new file mode 100644 index 00000000000..e4abbeff15d --- /dev/null +++ b/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc @@ -0,0 +1,49 @@ +[[analysis-keep-words-tokenfilter]] +=== Keep Words Token Filter + +A token filter of type `keep` that only keeps tokens with text contained in a +predefined set of words. The set of words can be defined in the settings or +loaded from a text file containing one word per line. + + +[float] +=== Options +[horizontal] +keep_words:: a list of words to keep +keep_words_path:: a path to a words file +keep_words_case:: a boolean indicating whether to lower case the words (defaults to `false`) + + + +[float] +=== Settings example + +[source,js] +-------------------------------------------------- +{ + "index" : { + "analysis" : { + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "standard", + "filter" : ["standard", "lowercase", "words_till_three"] + }, + "my_analyzer1" : { + "tokenizer" : "standard", + "filter" : ["standard", "lowercase", "words_on_file"] + } + }, + "filter" : { + "words_till_three" : { + "type" : "keep", + "keep_words" : [ "one", "two", "three"] + }, + "words_on_file" : { + "type" : "keep", + "keep_words_path" : "/path/to/word/file" + } + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc index a62261f9e19..875187242ec 100644 --- a/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc @@ -11,3 +11,5 @@ http://lucene.apache.org/core/4_3_1/analyzers-common/org/apache/lucene/analysis/ or the http://lucene.apache.org/core/4_3_1/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizer.html[PersianNormalizer] documentation. + +*Note:* These filters are available since `0.90.2`