[Analysis] Add missing docs for latvian analysis

2014-09-02 19:22:31 -04:00 · 2014-09-02 19:22:31 -04:00 · 395744b0d2
parent 8d3dd61b21
commit 395744b0d2
2 changed files with 48 additions and 1 deletions
--- a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
+++ b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
@ -25,6 +25,7 @@ following types are supported:
 <<indonesian-analyzer,`indonesian`>>,
 <<irish-analyzer,`irish`>>,
 <<italian-analyzer,`italian`>>,
 <<latvian-analyzer,`latvian`>>,
 <<norwegian-analyzer,`norwegian`>>,
 <<persian-analyzer,`persian`>>,
 <<portuguese-analyzer,`portuguese`>>,
@ -56,7 +57,7 @@ with the `keywords` set to the value of the `stem_exclusion` parameter.
 The following analyzers support setting custom `stem_exclusion` list:
 `arabic`, `armenian`, `basque`, `catalan`, `bulgarian`, `catalan`,
 `czech`, `finnish`, `dutch`, `english`, `finnish`, `french`, `galician`,
-`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
+`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `latvian`, `norwegian`,
 `portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `turkish`.
 ==== Reimplementing language analyzers
@ -1047,6 +1048,50 @@ The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
 <2> This filter should be removed unless there are words which should
    be excluded from stemming.
 [[latvian-analyzer]]
 ===== `latvian` analyzer
 The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows:
 [source,js]
 ----------------------------------------------------
 {
  "settings": {
    "analysis": {
      "filter": {
        "latvian_stop": {
          "type":       "stop",
          "stopwords":  "_latvian_" <1>
        },
        "latvian_keywords": {
          "type":       "keyword_marker",
          "keywords":   [] <2>
        },
        "italian_stemmer": {
          "type":       "stemmer",
          "language":   "latvian"
        }
      },
      "analyzer": {
        "latvian": {
          "tokenizer":  "standard",
          "filter": [
            "lowercase",
            "latvian_stop",
            "latvian_keywords",
            "latvian_stemmer"
          ]
        }
      }
    }
  }
 }
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
 <2> This filter should be removed unless there are words which should
    be excluded from stemming.
 [[norwegian-analyzer]]
 ===== `norwegian` analyzer
--- a/src/main/java/org/elasticsearch/index/analysis/Analysis.java
+++ b/src/main/java/org/elasticsearch/index/analysis/Analysis.java
@ -46,6 +46,7 @@ import org.apache.lucene.analysis.hu.HungarianAnalyzer;
 import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
 import org.apache.lucene.analysis.id.IndonesianAnalyzer;
 import org.apache.lucene.analysis.it.ItalianAnalyzer;
 import org.apache.lucene.analysis.lv.LatvianAnalyzer;
 import org.apache.lucene.analysis.nl.DutchAnalyzer;
 import org.apache.lucene.analysis.no.NorwegianAnalyzer;
 import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
@ -139,6 +140,7 @@ public class Analysis {
            .put("_indonesian_", IndonesianAnalyzer.getDefaultStopSet())
            .put("_irish_", IrishAnalyzer.getDefaultStopSet())
            .put("_italian_", ItalianAnalyzer.getDefaultStopSet())
            .put("_latvian_", LatvianAnalyzer.getDefaultStopSet())
            .put("_norwegian_", NorwegianAnalyzer.getDefaultStopSet())
            .put("_persian_", PersianAnalyzer.getDefaultStopSet())
            .put("_portuguese_", PortugueseAnalyzer.getDefaultStopSet())