mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
CONSOLEify analysis docs
Converts the analysis docs to that were marked as json into `CONSOLE` format. A few of them were in yaml but marked as json for historical reasons. I added more complete examples for a few of the less obvious sounding ones. Relates to #18160
This commit is contained in:
parent
01b807f98e
commit
ad69503dce
@ -53,18 +53,6 @@ buildRestTests.expectedUnconvertedCandidates = [
|
||||
'reference/aggregations/pipeline/serial-diff-aggregation.asciidoc',
|
||||
'reference/aggregations/pipeline/stats-bucket-aggregation.asciidoc',
|
||||
'reference/aggregations/pipeline/sum-bucket-aggregation.asciidoc',
|
||||
'reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/common-grams-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/elision-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/keyword-marker-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc',
|
||||
'reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc',
|
||||
'reference/cat/snapshots.asciidoc',
|
||||
'reference/cat/templates.asciidoc',
|
||||
'reference/cat/thread_pool.asciidoc',
|
||||
@ -124,10 +112,14 @@ integTestCluster {
|
||||
configFile 'scripts/my_map_script.painless'
|
||||
configFile 'scripts/my_combine_script.painless'
|
||||
configFile 'scripts/my_reduce_script.painless'
|
||||
configFile 'analysis/example_word_list.txt'
|
||||
configFile 'analysis/hyphenation_patterns.xml'
|
||||
configFile 'analysis/synonym.txt'
|
||||
configFile 'analysis/stemmer_override.txt'
|
||||
configFile 'userdict_ja.txt'
|
||||
configFile 'KeywordTokenizer.rbbi'
|
||||
extraConfigFile 'hunspell/en_US/en_US.aff', '../core/src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.aff'
|
||||
extraConfigFile 'hunspell/en_US/en_US.dic', '../core/src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.dic'
|
||||
// Whitelist reindexing from the local node so we can test it.
|
||||
setting 'reindex.remote.whitelist', '127.0.0.1:*'
|
||||
}
|
||||
|
@ -8,17 +8,21 @@ equivalents, if one exists. Example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"index" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"default" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "asciifolding"]
|
||||
PUT /asciifold_example
|
||||
{
|
||||
"settings" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"default" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "asciifolding"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
Accepts `preserve_original` setting which defaults to false but if true
|
||||
will keep the original token as well as emit the folded token. For
|
||||
@ -26,20 +30,24 @@ example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"index" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"default" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "my_ascii_folding"]
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"my_ascii_folding" : {
|
||||
"type" : "asciifolding",
|
||||
"preserve_original" : true
|
||||
PUT /asciifold_example
|
||||
{
|
||||
"settings" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"default" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "my_ascii_folding"]
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"my_ascii_folding" : {
|
||||
"type" : "asciifolding",
|
||||
"preserve_original" : true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -16,8 +16,9 @@ Bigrams are generated for characters in `han`, `hiragana`, `katakana` and
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /cjk_bigram_example
|
||||
{
|
||||
"index" : {
|
||||
"settings" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"han_bigrams" : {
|
||||
@ -40,3 +41,4 @@ Bigrams are generated for characters in `han`, `hiragana`, `katakana` and
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -41,21 +41,33 @@ Here is an example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index :
|
||||
analysis :
|
||||
analyzer :
|
||||
index_grams :
|
||||
tokenizer : whitespace
|
||||
filter : [common_grams]
|
||||
search_grams :
|
||||
tokenizer : whitespace
|
||||
filter : [common_grams_query]
|
||||
filter :
|
||||
common_grams :
|
||||
type : common_grams
|
||||
common_words: [a, an, the]
|
||||
common_grams_query :
|
||||
type : common_grams
|
||||
query_mode: true
|
||||
common_words: [a, an, the]
|
||||
PUT /common_grams_example
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"my_analyzer": {
|
||||
"index_grams": {
|
||||
"tokenizer": "whitespace",
|
||||
"filter": ["common_grams"]
|
||||
},
|
||||
"search_grams": {
|
||||
"tokenizer": "whitespace",
|
||||
"filter": ["common_grams_query"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"common_grams": {
|
||||
"type": "common_grams",
|
||||
"common_words": ["a", "an", "the"]
|
||||
},
|
||||
"common_grams_query": {
|
||||
"type": "common_grams",
|
||||
"query_mode": true,
|
||||
"common_words": ["a", "an", "the"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -1,5 +1,5 @@
|
||||
[[analysis-compound-word-tokenfilter]]
|
||||
=== Compound Word Token Filter
|
||||
=== Compound Word Token Filters
|
||||
|
||||
The `hyphenation_decompounder` and `dictionary_decompounder` token filters can
|
||||
decompose compound words found in many German languages into word parts.
|
||||
@ -84,20 +84,31 @@ Here is an example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index :
|
||||
analysis :
|
||||
analyzer :
|
||||
myAnalyzer2 :
|
||||
type : custom
|
||||
tokenizer : standard
|
||||
filter : [myTokenFilter1, myTokenFilter2]
|
||||
filter :
|
||||
myTokenFilter1 :
|
||||
type : dictionary_decompounder
|
||||
word_list: [one, two, three]
|
||||
myTokenFilter2 :
|
||||
type : hyphenation_decompounder
|
||||
word_list_path: path/to/words.txt
|
||||
hyphenation_patterns_path: path/to/fop.xml
|
||||
max_subword_size : 22
|
||||
PUT /compound_word_example
|
||||
{
|
||||
"index": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"my_analyzer": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["dictionary_decompounder", "hyphenation_decompounder"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"dictionary_decompounder": {
|
||||
"type": "dictionary_decompounder",
|
||||
"word_list": ["one", "two", "three"]
|
||||
},
|
||||
"hyphenation_decompounder": {
|
||||
"type" : "hyphenation_decompounder",
|
||||
"word_list_path": "analysis/example_word_list.txt",
|
||||
"hyphenation_patterns_path": "analysis/hyphenation_patterns.xml",
|
||||
"max_subword_size": 22
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -9,20 +9,24 @@ example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"index" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"default" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "elision"]
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"elision" : {
|
||||
"type" : "elision",
|
||||
"articles" : ["l", "m", "t", "qu", "n", "s", "j"]
|
||||
PUT /elision_example
|
||||
{
|
||||
"settings" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"default" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "elision"]
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"elision" : {
|
||||
"type" : "elision",
|
||||
"articles" : ["l", "m", "t", "qu", "n", "s", "j"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -10,7 +10,7 @@ one or more `*.dic` files (all of which will automatically be picked up).
|
||||
For example, assuming the default hunspell location is used, the
|
||||
following directory layout will define the `en_US` dictionary:
|
||||
|
||||
[source,js]
|
||||
[source,txt]
|
||||
--------------------------------------------------
|
||||
- conf
|
||||
|-- hunspell
|
||||
@ -42,24 +42,28 @@ settings:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /hunspell_example
|
||||
{
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"en" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : [ "lowercase", "en_US" ]
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"en_US" : {
|
||||
"type" : "hunspell",
|
||||
"locale" : "en_US",
|
||||
"dedup" : true
|
||||
"settings": {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"en" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : [ "lowercase", "en_US" ]
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"en_US" : {
|
||||
"type" : "hunspell",
|
||||
"locale" : "en_US",
|
||||
"dedup" : true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
The hunspell token filter accepts four options:
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
[[analysis-keep-types-tokenfilter]]
|
||||
=== Keep Types Token Filter
|
||||
|
||||
A token filter of type `keep_types` that only keeps tokens with a token type
|
||||
A token filter of type `keep_types` that only keeps tokens with a token type
|
||||
contained in a predefined set.
|
||||
|
||||
|
||||
@ -14,24 +14,61 @@ types:: a list of types to keep
|
||||
[float]
|
||||
=== Settings example
|
||||
|
||||
You can set it up like:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /keep_types_example
|
||||
{
|
||||
"index" : {
|
||||
"settings" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"my_analyzer" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "lowercase", "extract_numbers"]
|
||||
},
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"extract_numbers" : {
|
||||
"type" : "keep_types",
|
||||
"types" : [ "<NUM>" ]
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
And test it like:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST /keep_types_example/_analyze
|
||||
{
|
||||
"analyzer" : "my_analyzer",
|
||||
"text" : "this is just 1 a test"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
|
||||
And it'd respond:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tokens": [
|
||||
{
|
||||
"token": "1",
|
||||
"start_offset": 13,
|
||||
"end_offset": 14,
|
||||
"type": "<NUM>",
|
||||
"position": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
Note how only the `<NUM>` token is in the output.
|
||||
|
@ -20,17 +20,18 @@ keep_words_case:: a boolean indicating whether to lower case the words (defaults
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /keep_words_example
|
||||
{
|
||||
"index" : {
|
||||
"settings" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"my_analyzer" : {
|
||||
"example_1" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "lowercase", "words_till_three"]
|
||||
},
|
||||
"my_analyzer1" : {
|
||||
"example_2" : {
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["standard", "lowercase", "words_on_file"]
|
||||
"filter" : ["standard", "lowercase", "words_in_file"]
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
@ -38,12 +39,13 @@ keep_words_case:: a boolean indicating whether to lower case the words (defaults
|
||||
"type" : "keep",
|
||||
"keep_words" : [ "one", "two", "three"]
|
||||
},
|
||||
"words_on_file" : {
|
||||
"words_in_file" : {
|
||||
"type" : "keep",
|
||||
"keep_words_path" : "/path/to/word/file"
|
||||
"keep_words_path" : "analysis/example_word_list.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -19,19 +19,124 @@ in the text.
|
||||
`false`.
|
||||
|=======================================================================
|
||||
|
||||
Here is an example:
|
||||
You can configure it like:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index :
|
||||
analysis :
|
||||
analyzer :
|
||||
myAnalyzer :
|
||||
type : custom
|
||||
tokenizer : standard
|
||||
filter : [lowercase, protwords, porter_stem]
|
||||
filter :
|
||||
protwords :
|
||||
type : keyword_marker
|
||||
keywords_path : analysis/protwords.txt
|
||||
PUT /keyword_marker_example
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"protect_cats": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "protect_cats", "porter_stem"]
|
||||
},
|
||||
"normal": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "porter_stem"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"protect_cats": {
|
||||
"type": "keyword_marker",
|
||||
"keywords": ["cats"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
And test it with:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST /keyword_marker_example/_analyze
|
||||
{
|
||||
"analyzer" : "protect_cats",
|
||||
"text" : "I like cats"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
|
||||
And it'd respond:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tokens": [
|
||||
{
|
||||
"token": "i",
|
||||
"start_offset": 0,
|
||||
"end_offset": 1,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 0
|
||||
},
|
||||
{
|
||||
"token": "like",
|
||||
"start_offset": 2,
|
||||
"end_offset": 6,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 1
|
||||
},
|
||||
{
|
||||
"token": "cats",
|
||||
"start_offset": 7,
|
||||
"end_offset": 11,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
As compared to the `normal` analyzer which has `cats` stemmed to `cat`:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST /keyword_marker_example/_analyze
|
||||
{
|
||||
"analyzer" : "normal",
|
||||
"text" : "I like cats"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
|
||||
Response:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tokens": [
|
||||
{
|
||||
"token": "i",
|
||||
"start_offset": 0,
|
||||
"end_offset": 1,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 0
|
||||
},
|
||||
{
|
||||
"token": "like",
|
||||
"start_offset": 2,
|
||||
"end_offset": 6,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 1
|
||||
},
|
||||
{
|
||||
"token": "cat",
|
||||
"start_offset": 7,
|
||||
"end_offset": 11,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE
|
||||
|
@ -9,18 +9,85 @@ subsequent stemmer will be indexed twice. Therefore, consider adding a
|
||||
`unique` filter with `only_on_same_position` set to `true` to drop
|
||||
unnecessary duplicates.
|
||||
|
||||
Here is an example:
|
||||
Here is an example of using the `keyword_repeat` token filter to
|
||||
preserve both the stemmed and unstemmed version of tokens:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index :
|
||||
analysis :
|
||||
analyzer :
|
||||
myAnalyzer :
|
||||
type : custom
|
||||
tokenizer : standard
|
||||
filter : [lowercase, keyword_repeat, porter_stem, unique_stem]
|
||||
unique_stem:
|
||||
type: unique
|
||||
only_on_same_position : true
|
||||
PUT /keyword_repeat_example
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"stemmed_and_unstemmed": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "keyword_repeat", "porter_stem", "unique_stem"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"unique_stem": {
|
||||
"type": "unique",
|
||||
"only_on_same_position": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
And you can test it with:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST /keyword_repeat_example/_analyze
|
||||
{
|
||||
"analyzer" : "stemmed_and_unstemmed",
|
||||
"text" : "I like cats"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
|
||||
And it'd respond:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tokens": [
|
||||
{
|
||||
"token": "i",
|
||||
"start_offset": 0,
|
||||
"end_offset": 1,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 0
|
||||
},
|
||||
{
|
||||
"token": "like",
|
||||
"start_offset": 2,
|
||||
"end_offset": 6,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 1
|
||||
},
|
||||
{
|
||||
"token": "cats",
|
||||
"start_offset": 7,
|
||||
"end_offset": 11,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 2
|
||||
},
|
||||
{
|
||||
"token": "cat",
|
||||
"start_offset": 7,
|
||||
"end_offset": 11,
|
||||
"type": "<ALPHANUM>",
|
||||
"position": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
Which preserves both the `cat` and `cats` tokens. Compare this to the example
|
||||
on the <<analysis-keyword-marker-tokenfilter>>.
|
||||
|
@ -18,15 +18,25 @@ Here is an example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index :
|
||||
analysis :
|
||||
analyzer :
|
||||
myAnalyzer :
|
||||
type : custom
|
||||
tokenizer : standard
|
||||
filter : [lowercase, five_token_limit]
|
||||
filter :
|
||||
five_token_limit :
|
||||
type : limit
|
||||
max_token_count : 5
|
||||
PUT /limit_example
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"limit_example": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "five_token_limit"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"five_token_limit": {
|
||||
"type": "limit",
|
||||
"max_token_count": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -10,28 +10,30 @@ custom analyzer
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index :
|
||||
analysis :
|
||||
analyzer :
|
||||
myAnalyzer2 :
|
||||
type : custom
|
||||
tokenizer : myTokenizer1
|
||||
filter : [myTokenFilter1, myGreekLowerCaseFilter]
|
||||
char_filter : [my_html]
|
||||
tokenizer :
|
||||
myTokenizer1 :
|
||||
type : standard
|
||||
max_token_length : 900
|
||||
filter :
|
||||
myTokenFilter1 :
|
||||
type : stop
|
||||
stopwords : [stop1, stop2, stop3, stop4]
|
||||
myGreekLowerCaseFilter :
|
||||
type : lowercase
|
||||
language : greek
|
||||
char_filter :
|
||||
my_html :
|
||||
type : html_strip
|
||||
escaped_tags : [xxx, yyy]
|
||||
read_ahead : 1024
|
||||
PUT /lowercase_example
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"standard_lowercase_example": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase"]
|
||||
},
|
||||
"greek_lowercase_example": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["greek_lowercase"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"greek_lowercase": {
|
||||
"type": "lowercase",
|
||||
"language": "greek"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
@ -0,0 +1,4 @@
|
||||
test
|
||||
list
|
||||
of
|
||||
words
|
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE hyphenation-info SYSTEM "hyphenation.dtd">
|
||||
|
||||
<!-- Example hyphenation patterns file. -->
|
||||
|
||||
<hyphenation-info>
|
||||
|
||||
<hyphen-char value="-"/>
|
||||
<hyphen-min before="2" after="2"/>
|
||||
|
||||
<classes>
|
||||
aA
|
||||
</classes>
|
||||
|
||||
<exceptions>
|
||||
</exceptions>
|
||||
|
||||
<patterns>
|
||||
.a2
|
||||
</patterns>
|
||||
</hyphenation-info>
|
Loading…
x
Reference in New Issue
Block a user