[Analysis] Add missing docs for latvian analysis
This commit is contained in:
parent
8d3dd61b21
commit
395744b0d2
|
@ -25,6 +25,7 @@ following types are supported:
|
|||
<<indonesian-analyzer,`indonesian`>>,
|
||||
<<irish-analyzer,`irish`>>,
|
||||
<<italian-analyzer,`italian`>>,
|
||||
<<latvian-analyzer,`latvian`>>,
|
||||
<<norwegian-analyzer,`norwegian`>>,
|
||||
<<persian-analyzer,`persian`>>,
|
||||
<<portuguese-analyzer,`portuguese`>>,
|
||||
|
@ -56,7 +57,7 @@ with the `keywords` set to the value of the `stem_exclusion` parameter.
|
|||
The following analyzers support setting custom `stem_exclusion` list:
|
||||
`arabic`, `armenian`, `basque`, `catalan`, `bulgarian`, `catalan`,
|
||||
`czech`, `finnish`, `dutch`, `english`, `finnish`, `french`, `galician`,
|
||||
`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
|
||||
`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `latvian`, `norwegian`,
|
||||
`portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `turkish`.
|
||||
|
||||
==== Reimplementing language analyzers
|
||||
|
@ -1047,6 +1048,50 @@ The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
|
|||
<2> This filter should be removed unless there are words which should
|
||||
be excluded from stemming.
|
||||
|
||||
[[latvian-analyzer]]
|
||||
===== `latvian` analyzer
|
||||
|
||||
The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows:
|
||||
|
||||
[source,js]
|
||||
----------------------------------------------------
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"filter": {
|
||||
"latvian_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_latvian_" <1>
|
||||
},
|
||||
"latvian_keywords": {
|
||||
"type": "keyword_marker",
|
||||
"keywords": [] <2>
|
||||
},
|
||||
"italian_stemmer": {
|
||||
"type": "stemmer",
|
||||
"language": "latvian"
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"latvian": {
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"latvian_stop",
|
||||
"latvian_keywords",
|
||||
"latvian_stemmer"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
----------------------------------------------------
|
||||
<1> The default stopwords can be overridden with the `stopwords`
|
||||
or `stopwords_path` parameters.
|
||||
<2> This filter should be removed unless there are words which should
|
||||
be excluded from stemming.
|
||||
|
||||
[[norwegian-analyzer]]
|
||||
===== `norwegian` analyzer
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
|||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||
|
@ -139,6 +140,7 @@ public class Analysis {
|
|||
.put("_indonesian_", IndonesianAnalyzer.getDefaultStopSet())
|
||||
.put("_irish_", IrishAnalyzer.getDefaultStopSet())
|
||||
.put("_italian_", ItalianAnalyzer.getDefaultStopSet())
|
||||
.put("_latvian_", LatvianAnalyzer.getDefaultStopSet())
|
||||
.put("_norwegian_", NorwegianAnalyzer.getDefaultStopSet())
|
||||
.put("_persian_", PersianAnalyzer.getDefaultStopSet())
|
||||
.put("_portuguese_", PortugueseAnalyzer.getDefaultStopSet())
|
||||
|
|
Loading…
Reference in New Issue