mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-09 22:45:04 +00:00
5a2c6f0d4f
Plugin discovery documentation contained information about installing Elasticsearch 2.0 and installing an oracle JDK, both of which is no longer valid. While noticing that the instructions used cleartext HTTP to install packages, this commit replaces HTTPs links instead of HTTP where possible. In addition a few community links have been removed, as they do not seem to exist anymore. Co-authored-by: Alexander Reelsen <alexander@reelsen.net>
113 lines
2.7 KiB
Plaintext
113 lines
2.7 KiB
Plaintext
[[analysis-stempel]]
|
|
=== Stempel Polish Analysis Plugin
|
|
|
|
The Stempel Analysis plugin integrates Lucene's Stempel analysis
|
|
module for Polish into elasticsearch.
|
|
|
|
:plugin_name: analysis-stempel
|
|
include::install_remove.asciidoc[]
|
|
|
|
[[analysis-stempel-tokenizer]]
|
|
[discrete]
|
|
==== `stempel` tokenizer and token filters
|
|
|
|
The plugin provides the `polish` analyzer and the `polish_stem` and `polish_stop` token filters,
|
|
which are not configurable.
|
|
|
|
==== Reimplementing and extending the analyzers
|
|
|
|
The `polish` analyzer could be reimplemented as a `custom` analyzer that can
|
|
then be extended and configured differently as follows:
|
|
|
|
[source,console]
|
|
----------------------------------------------------
|
|
PUT /stempel_example
|
|
{
|
|
"settings": {
|
|
"analysis": {
|
|
"analyzer": {
|
|
"rebuilt_stempel": {
|
|
"tokenizer": "standard",
|
|
"filter": [
|
|
"lowercase",
|
|
"polish_stop",
|
|
"polish_stem"
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----------------------------------------------------
|
|
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: stempel_example, first: polish, second: rebuilt_stempel}\nendyaml\n/]
|
|
|
|
[[analysis-polish-stop]]
|
|
==== `polish_stop` token filter
|
|
|
|
The `polish_stop` token filter filters out Polish stopwords (`_polish_`), and
|
|
any other custom stopwords specified by the user. This filter only supports
|
|
the predefined `_polish_` stopwords list. If you want to use a different
|
|
predefined list, then use the
|
|
{ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead.
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
PUT /polish_stop_example
|
|
{
|
|
"settings": {
|
|
"index": {
|
|
"analysis": {
|
|
"analyzer": {
|
|
"analyzer_with_stop": {
|
|
"tokenizer": "standard",
|
|
"filter": [
|
|
"lowercase",
|
|
"polish_stop"
|
|
]
|
|
}
|
|
},
|
|
"filter": {
|
|
"polish_stop": {
|
|
"type": "polish_stop",
|
|
"stopwords": [
|
|
"_polish_",
|
|
"jeść"
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
GET polish_stop_example/_analyze
|
|
{
|
|
"analyzer": "analyzer_with_stop",
|
|
"text": "Gdzie kucharek sześć, tam nie ma co jeść."
|
|
}
|
|
--------------------------------------------------
|
|
|
|
The above request returns:
|
|
|
|
[source,console-result]
|
|
--------------------------------------------------
|
|
{
|
|
"tokens" : [
|
|
{
|
|
"token" : "kucharek",
|
|
"start_offset" : 6,
|
|
"end_offset" : 14,
|
|
"type" : "<ALPHANUM>",
|
|
"position" : 1
|
|
},
|
|
{
|
|
"token" : "sześć",
|
|
"start_offset" : 15,
|
|
"end_offset" : 20,
|
|
"type" : "<ALPHANUM>",
|
|
"position" : 2
|
|
}
|
|
]
|
|
}
|
|
--------------------------------------------------
|