[Analysis] Add missing docs for latvian analysis

This commit is contained in:
Robert Muir 2014-09-02 19:22:31 -04:00
parent 8d3dd61b21
commit 395744b0d2
2 changed files with 48 additions and 1 deletions

View File

@ -25,6 +25,7 @@ following types are supported:
<<indonesian-analyzer,`indonesian`>>,
<<irish-analyzer,`irish`>>,
<<italian-analyzer,`italian`>>,
<<latvian-analyzer,`latvian`>>,
<<norwegian-analyzer,`norwegian`>>,
<<persian-analyzer,`persian`>>,
<<portuguese-analyzer,`portuguese`>>,
@ -56,7 +57,7 @@ with the `keywords` set to the value of the `stem_exclusion` parameter.
The following analyzers support setting custom `stem_exclusion` list:
`arabic`, `armenian`, `basque`, `catalan`, `bulgarian`, `catalan`,
`czech`, `finnish`, `dutch`, `english`, `finnish`, `french`, `galician`,
`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `latvian`, `norwegian`,
`portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `turkish`.
==== Reimplementing language analyzers
@ -1047,6 +1048,50 @@ The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
<2> This filter should be removed unless there are words which should
be excluded from stemming.
[[latvian-analyzer]]
===== `latvian` analyzer
The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows:
[source,js]
----------------------------------------------------
{
"settings": {
"analysis": {
"filter": {
"latvian_stop": {
"type": "stop",
"stopwords": "_latvian_" <1>
},
"latvian_keywords": {
"type": "keyword_marker",
"keywords": [] <2>
},
"italian_stemmer": {
"type": "stemmer",
"language": "latvian"
}
},
"analyzer": {
"latvian": {
"tokenizer": "standard",
"filter": [
"lowercase",
"latvian_stop",
"latvian_keywords",
"latvian_stemmer"
]
}
}
}
}
}
----------------------------------------------------
<1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should
be excluded from stemming.
[[norwegian-analyzer]]
===== `norwegian` analyzer

View File

@ -46,6 +46,7 @@ import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
@ -139,6 +140,7 @@ public class Analysis {
.put("_indonesian_", IndonesianAnalyzer.getDefaultStopSet())
.put("_irish_", IrishAnalyzer.getDefaultStopSet())
.put("_italian_", ItalianAnalyzer.getDefaultStopSet())
.put("_latvian_", LatvianAnalyzer.getDefaultStopSet())
.put("_norwegian_", NorwegianAnalyzer.getDefaultStopSet())
.put("_persian_", PersianAnalyzer.getDefaultStopSet())
.put("_portuguese_", PortugueseAnalyzer.getDefaultStopSet())