2016-05-11 08:17:56 -04:00
|
|
|
[[configuring-analyzers]]
|
|
|
|
=== Configuring built-in analyzers
|
|
|
|
|
|
|
|
The built-in analyzers can be used directly without any configuration. Some
|
|
|
|
of them, however, support configuration options to alter their behaviour. For
|
|
|
|
instance, the <<analysis-standard-analyzer,`standard` analyzer>> can be configured
|
|
|
|
to support a list of stop words:
|
|
|
|
|
2019-09-09 13:38:14 -04:00
|
|
|
[source,console]
|
2016-05-11 08:17:56 -04:00
|
|
|
--------------------------------
|
2020-07-27 15:58:26 -04:00
|
|
|
PUT my-index-000001
|
2016-05-11 08:17:56 -04:00
|
|
|
{
|
|
|
|
"settings": {
|
|
|
|
"analysis": {
|
|
|
|
"analyzer": {
|
|
|
|
"std_english": { <1>
|
|
|
|
"type": "standard",
|
|
|
|
"stopwords": "_english_"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"mappings": {
|
2019-01-22 09:13:52 -05:00
|
|
|
"properties": {
|
|
|
|
"my_text": {
|
|
|
|
"type": "text",
|
|
|
|
"analyzer": "standard", <2>
|
|
|
|
"fields": {
|
|
|
|
"english": {
|
|
|
|
"type": "text",
|
|
|
|
"analyzer": "std_english" <3>
|
2016-05-11 08:17:56 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-27 15:58:26 -04:00
|
|
|
POST my-index-000001/_analyze
|
2016-05-11 08:17:56 -04:00
|
|
|
{
|
|
|
|
"field": "my_text", <2>
|
|
|
|
"text": "The old brown cow"
|
|
|
|
}
|
|
|
|
|
2020-07-27 15:58:26 -04:00
|
|
|
POST my-index-000001/_analyze
|
2016-05-11 08:17:56 -04:00
|
|
|
{
|
|
|
|
"field": "my_text.english", <3>
|
|
|
|
"text": "The old brown cow"
|
|
|
|
}
|
|
|
|
|
|
|
|
--------------------------------
|
|
|
|
|
|
|
|
<1> We define the `std_english` analyzer to be based on the `standard`
|
|
|
|
analyzer, but configured to remove the pre-defined list of English stopwords.
|
|
|
|
<2> The `my_text` field uses the `standard` analyzer directly, without
|
|
|
|
any configuration. No stop words will be removed from this field.
|
|
|
|
The resulting terms are: `[ the, old, brown, cow ]`
|
|
|
|
<3> The `my_text.english` field uses the `std_english` analyzer, so
|
|
|
|
English stop words will be removed. The resulting terms are:
|
|
|
|
`[ old, brown, cow ]`
|
|
|
|
|
2016-05-19 13:42:23 -04:00
|
|
|
|
|
|
|
/////////////////////
|
|
|
|
|
2019-09-06 09:22:08 -04:00
|
|
|
[source,console-result]
|
2016-05-19 13:42:23 -04:00
|
|
|
----------------------------
|
|
|
|
{
|
|
|
|
"tokens": [
|
|
|
|
{
|
|
|
|
"token": "old",
|
|
|
|
"start_offset": 4,
|
|
|
|
"end_offset": 7,
|
|
|
|
"type": "<ALPHANUM>",
|
|
|
|
"position": 1
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"token": "brown",
|
|
|
|
"start_offset": 8,
|
|
|
|
"end_offset": 13,
|
|
|
|
"type": "<ALPHANUM>",
|
|
|
|
"position": 2
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"token": "cow",
|
|
|
|
"start_offset": 14,
|
|
|
|
"end_offset": 17,
|
|
|
|
"type": "<ALPHANUM>",
|
|
|
|
"position": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
----------------------------
|
|
|
|
|
|
|
|
/////////////////////
|