CONSOLEify some more docs
And expand on the `stemmer_override` examples, including the file on disk and an example of specifying the rules inline. Relates to #18160
This commit is contained in:
parent
f927a2708d
commit
a783c6c85c
|
@ -73,9 +73,6 @@ buildRestTests.expectedUnconvertedCandidates = [
|
||||||
'reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc',
|
'reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc',
|
||||||
'reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc',
|
'reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc',
|
||||||
'reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc',
|
'reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc',
|
||||||
'reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc',
|
|
||||||
'reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc',
|
|
||||||
'reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc',
|
|
||||||
'reference/cat/snapshots.asciidoc',
|
'reference/cat/snapshots.asciidoc',
|
||||||
'reference/cat/templates.asciidoc',
|
'reference/cat/templates.asciidoc',
|
||||||
'reference/cat/thread_pool.asciidoc',
|
'reference/cat/thread_pool.asciidoc',
|
||||||
|
@ -140,6 +137,7 @@ integTestCluster {
|
||||||
configFile 'scripts/my_combine_script.painless'
|
configFile 'scripts/my_combine_script.painless'
|
||||||
configFile 'scripts/my_reduce_script.painless'
|
configFile 'scripts/my_reduce_script.painless'
|
||||||
configFile 'analysis/synonym.txt'
|
configFile 'analysis/synonym.txt'
|
||||||
|
configFile 'analysis/stemmer_override.txt'
|
||||||
configFile 'userdict_ja.txt'
|
configFile 'userdict_ja.txt'
|
||||||
configFile 'KeywordTokenizer.rbbi'
|
configFile 'KeywordTokenizer.rbbi'
|
||||||
// Whitelist reindexing from the local node so we can test it.
|
// Whitelist reindexing from the local node so we can test it.
|
||||||
|
|
|
@ -23,14 +23,14 @@ Read more about http://www.regular-expressions.info/catastrophic.html[pathologic
|
||||||
|
|
||||||
For instance a pattern like :
|
For instance a pattern like :
|
||||||
|
|
||||||
[source,js]
|
[source,text]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
"(([a-z]+)(\d*))"
|
"(([a-z]+)(\d*))"
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
when matched against:
|
when matched against:
|
||||||
|
|
||||||
[source,js]
|
[source,text]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
"abc123def456"
|
"abc123def456"
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -74,7 +74,7 @@ PUT test
|
||||||
|
|
||||||
When used to analyze the text
|
When used to analyze the text
|
||||||
|
|
||||||
[source,js]
|
[source,java]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
import static org.apache.commons.lang.StringEscapeUtils.escapeHtml
|
import static org.apache.commons.lang.StringEscapeUtils.escapeHtml
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -117,7 +117,7 @@ PUT test
|
||||||
|
|
||||||
When the above analyzer is used on an email address like:
|
When the above analyzer is used on an email address like:
|
||||||
|
|
||||||
[source,js]
|
[source,text]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
john-smith_123@foo-bar.com
|
john-smith_123@foo-bar.com
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -136,14 +136,14 @@ original token will be highlighted, not just the matching subset. For
|
||||||
instance, querying the above email address for `"smith"` would
|
instance, querying the above email address for `"smith"` would
|
||||||
highlight:
|
highlight:
|
||||||
|
|
||||||
[source,js]
|
[source,html]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
<em>john-smith_123@foo-bar.com</em>
|
<em>john-smith_123@foo-bar.com</em>
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
not:
|
not:
|
||||||
|
|
||||||
[source,js]
|
[source,html]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
john-<em>smith</em>_123@foo-bar.com
|
john-<em>smith</em>_123@foo-bar.com
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
|
@ -12,8 +12,9 @@ For example:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
PUT /my_index
|
||||||
{
|
{
|
||||||
"index" : {
|
"settings": {
|
||||||
"analysis" : {
|
"analysis" : {
|
||||||
"analyzer" : {
|
"analyzer" : {
|
||||||
"my_analyzer" : {
|
"my_analyzer" : {
|
||||||
|
@ -31,3 +32,4 @@ For example:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
|
|
@ -20,15 +20,60 @@ Here is an example:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
index :
|
PUT /my_index
|
||||||
analysis :
|
{
|
||||||
analyzer :
|
"settings": {
|
||||||
myAnalyzer :
|
"analysis" : {
|
||||||
type : custom
|
"analyzer" : {
|
||||||
tokenizer : standard
|
"my_analyzer" : {
|
||||||
filter : [lowercase, custom_stems, porter_stem]
|
"tokenizer" : "standard",
|
||||||
filter:
|
"filter" : ["lowercase", "custom_stems", "porter_stem"]
|
||||||
custom_stems:
|
}
|
||||||
type: stemmer_override
|
},
|
||||||
rules_path : analysis/custom_stems.txt
|
"filter" : {
|
||||||
|
"custom_stems" : {
|
||||||
|
"type" : "stemmer_override",
|
||||||
|
"rules_path" : "analysis/stemmer_override.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
|
||||||
|
Where the file looks like:
|
||||||
|
|
||||||
|
[source,stemmer_override]
|
||||||
|
--------------------------------------------------
|
||||||
|
include::{docdir}/../src/test/cluster/config/analysis/stemmer_override.txt[]
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
You can also define the overrides rules inline:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /my_index
|
||||||
|
{
|
||||||
|
"settings": {
|
||||||
|
"analysis" : {
|
||||||
|
"analyzer" : {
|
||||||
|
"my_analyzer" : {
|
||||||
|
"tokenizer" : "standard",
|
||||||
|
"filter" : ["lowercase", "custom_stems", "porter_stem"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filter" : {
|
||||||
|
"custom_stems" : {
|
||||||
|
"type" : "stemmer_override",
|
||||||
|
"rules" : [
|
||||||
|
"running => run",
|
||||||
|
"stemmer => stemmer"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
running => run
|
||||||
|
|
||||||
|
stemmer => stemmer
|
Loading…
Reference in New Issue