diff --git a/docs/build.gradle b/docs/build.gradle index 769e59a1a7c..f8e5ff0dc25 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -73,9 +73,6 @@ buildRestTests.expectedUnconvertedCandidates = [ 'reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc', 'reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc', 'reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc', - 'reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc', - 'reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc', - 'reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc', 'reference/cat/snapshots.asciidoc', 'reference/cat/templates.asciidoc', 'reference/cat/thread_pool.asciidoc', @@ -140,6 +137,7 @@ integTestCluster { configFile 'scripts/my_combine_script.painless' configFile 'scripts/my_reduce_script.painless' configFile 'analysis/synonym.txt' + configFile 'analysis/stemmer_override.txt' configFile 'userdict_ja.txt' configFile 'KeywordTokenizer.rbbi' // Whitelist reindexing from the local node so we can test it. diff --git a/docs/reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc index 177c4195bbf..4dac79b6571 100644 --- a/docs/reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc @@ -23,14 +23,14 @@ Read more about http://www.regular-expressions.info/catastrophic.html[pathologic For instance a pattern like : -[source,js] +[source,text] -------------------------------------------------- "(([a-z]+)(\d*))" -------------------------------------------------- when matched against: -[source,js] +[source,text] -------------------------------------------------- "abc123def456" -------------------------------------------------- @@ -74,7 +74,7 @@ PUT test When used to analyze the text -[source,js] +[source,java] -------------------------------------------------- import static org.apache.commons.lang.StringEscapeUtils.escapeHtml -------------------------------------------------- @@ -117,7 +117,7 @@ PUT test When the above analyzer is used on an email address like: -[source,js] +[source,text] -------------------------------------------------- john-smith_123@foo-bar.com -------------------------------------------------- @@ -136,14 +136,14 @@ original token will be highlighted, not just the matching subset. For instance, querying the above email address for `"smith"` would highlight: -[source,js] +[source,html] -------------------------------------------------- john-smith_123@foo-bar.com -------------------------------------------------- not: -[source,js] +[source,html] -------------------------------------------------- john-smith_123@foo-bar.com -------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc index 6042642027c..93e1eed26b4 100644 --- a/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc @@ -12,8 +12,9 @@ For example: [source,js] -------------------------------------------------- +PUT /my_index { - "index" : { + "settings": { "analysis" : { "analyzer" : { "my_analyzer" : { @@ -31,3 +32,4 @@ For example: } } -------------------------------------------------- +// CONSOLE diff --git a/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc index 6e010894f41..33191805fe6 100644 --- a/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc @@ -20,15 +20,60 @@ Here is an example: [source,js] -------------------------------------------------- -index : - analysis : - analyzer : - myAnalyzer : - type : custom - tokenizer : standard - filter : [lowercase, custom_stems, porter_stem] - filter: - custom_stems: - type: stemmer_override - rules_path : analysis/custom_stems.txt +PUT /my_index +{ + "settings": { + "analysis" : { + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "standard", + "filter" : ["lowercase", "custom_stems", "porter_stem"] + } + }, + "filter" : { + "custom_stems" : { + "type" : "stemmer_override", + "rules_path" : "analysis/stemmer_override.txt" + } + } + } + } +} -------------------------------------------------- +// CONSOLE + +Where the file looks like: + +[source,stemmer_override] +-------------------------------------------------- +include::{docdir}/../src/test/cluster/config/analysis/stemmer_override.txt[] +-------------------------------------------------- + +You can also define the overrides rules inline: + +[source,js] +-------------------------------------------------- +PUT /my_index +{ + "settings": { + "analysis" : { + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "standard", + "filter" : ["lowercase", "custom_stems", "porter_stem"] + } + }, + "filter" : { + "custom_stems" : { + "type" : "stemmer_override", + "rules" : [ + "running => run", + "stemmer => stemmer" + ] + } + } + } + } +} +-------------------------------------------------- +// CONSOLE diff --git a/docs/src/test/cluster/config/analysis/stemmer_override.txt b/docs/src/test/cluster/config/analysis/stemmer_override.txt new file mode 100644 index 00000000000..6f6cd771cf5 --- /dev/null +++ b/docs/src/test/cluster/config/analysis/stemmer_override.txt @@ -0,0 +1,3 @@ +running => run + +stemmer => stemmer