[Analysis] Add missing docs for latvian analysis

2014-09-02 19:22:31 -04:00 · 2014-09-02 19:22:31 -04:00 · 395744b0d2
parent 8d3dd61b21
commit 395744b0d2
2 changed files with 48 additions and 1 deletions
--- a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
+++ b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
@ -25,6 +25,7 @@ following types are supported:
 <<indonesian-analyzer,`indonesian`>>,
 <<irish-analyzer,`irish`>>,
 <<italian-analyzer,`italian`>>,
+<<latvian-analyzer,`latvian`>>,
 <<norwegian-analyzer,`norwegian`>>,
 <<persian-analyzer,`persian`>>,
 <<portuguese-analyzer,`portuguese`>>,
@ -56,7 +57,7 @@ with the `keywords` set to the value of the `stem_exclusion` parameter.
 The following analyzers support setting custom `stem_exclusion` list:
 `arabic`, `armenian`, `basque`, `catalan`, `bulgarian`, `catalan`,
 `czech`, `finnish`, `dutch`, `english`, `finnish`, `french`, `galician`,
-`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
+`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `latvian`, `norwegian`,
 `portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `turkish`.

 ==== Reimplementing language analyzers
@ -1047,6 +1048,50 @@ The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
 <2> This filter should be removed unless there are words which should
    be excluded from stemming.

+[[latvian-analyzer]]
+===== `latvian` analyzer
+
+The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows:
+
+[source,js]
+----------------------------------------------------
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "latvian_stop": {
+          "type":       "stop",
+          "stopwords":  "_latvian_" <1>
+        },
+        "latvian_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] <2>
+        },
+        "italian_stemmer": {
+          "type":       "stemmer",
+          "language":   "latvian"
+        }
+      },
+      "analyzer": {
+        "latvian": {
+          "tokenizer":  "standard",
+          "filter": [
+            "lowercase",
+            "latvian_stop",
+            "latvian_keywords",
+            "latvian_stemmer"
+          ]
+        }
+      }
+    }
+  }
+}
+----------------------------------------------------
+<1> The default stopwords can be overridden with the `stopwords`
+    or `stopwords_path` parameters.
+<2> This filter should be removed unless there are words which should
+    be excluded from stemming.
+
 [[norwegian-analyzer]]
 ===== `norwegian` analyzer

--- a/src/main/java/org/elasticsearch/index/analysis/Analysis.java
+++ b/src/main/java/org/elasticsearch/index/analysis/Analysis.java
@ -46,6 +46,7 @@ import org.apache.lucene.analysis.hu.HungarianAnalyzer;
 import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
 import org.apache.lucene.analysis.id.IndonesianAnalyzer;
 import org.apache.lucene.analysis.it.ItalianAnalyzer;
+import org.apache.lucene.analysis.lv.LatvianAnalyzer;
 import org.apache.lucene.analysis.nl.DutchAnalyzer;
 import org.apache.lucene.analysis.no.NorwegianAnalyzer;
 import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
@ -139,6 +140,7 @@ public class Analysis {
            .put("_indonesian_", IndonesianAnalyzer.getDefaultStopSet())
            .put("_irish_", IrishAnalyzer.getDefaultStopSet())
            .put("_italian_", ItalianAnalyzer.getDefaultStopSet())
+            .put("_latvian_", LatvianAnalyzer.getDefaultStopSet())
            .put("_norwegian_", NorwegianAnalyzer.getDefaultStopSet())
            .put("_persian_", PersianAnalyzer.getDefaultStopSet())
            .put("_portuguese_", PortugueseAnalyzer.getDefaultStopSet())