From 85eae8b3f508d7f6fd4736e0904311f7a579515a Mon Sep 17 00:00:00 2001 From: kimchy Date: Thu, 5 May 2011 23:45:59 +0300 Subject: [PATCH] Analysis: Expose light and minimal language token filters, closes #908. --- .../index/analysis/Analysis.java | 22 ++- .../index/analysis/AnalysisModule.java | 33 ++-- .../index/analysis/AnalysisService.java | 4 + .../analysis/ArabicAnalyzerProvider.java | 8 +- .../analysis/ArmenianAnalyzerProvider.java | 48 ++++++ .../analysis/BasqueAnalyzerProvider.java | 48 ++++++ .../analysis/BrazilianAnalyzerProvider.java | 18 +-- .../analysis/BulgarianAnalyzerProvider.java | 47 ++++++ .../analysis/CatalanAnalyzerProvider.java | 48 ++++++ .../index/analysis/CzechAnalyzerProvider.java | 9 +- .../analysis/DanishAnalyzerProvider.java | 48 ++++++ .../index/analysis/DutchAnalyzerProvider.java | 19 +-- .../analysis/EnglishAnalyzerProvider.java | 48 ++++++ .../analysis/FinnishAnalyzerProvider.java | 48 ++++++ .../analysis/FrenchAnalyzerProvider.java | 19 +-- .../analysis/GalicianAnalyzerProvider.java | 48 ++++++ .../analysis/GermanAnalyzerProvider.java | 19 +-- .../index/analysis/GreekAnalyzerProvider.java | 7 +- .../index/analysis/HindiAnalyzerProvider.java | 48 ++++++ .../analysis/HungarianAnalyzerProvider.java | 48 ++++++ .../analysis/IndonesianAnalyzerProvider.java | 48 ++++++ .../analysis/ItalianAnalyzerProvider.java | 48 ++++++ .../analysis/NorwegianAnalyzerProvider.java | 48 ++++++ .../analysis/PersianAnalyzerProvider.java | 7 +- .../analysis/PortugueseAnalyzerProvider.java | 48 ++++++ .../analysis/RomanianAnalyzerProvider.java | 48 ++++++ .../analysis/RussianAnalyzerProvider.java | 17 +- .../analysis/SnowballTokenFilterFactory.java | 3 +- .../analysis/SpanishAnalyzerProvider.java | 48 ++++++ .../analysis/StemmerTokenFilterFactory.java | 145 ++++++++++++++++++ .../analysis/SwedishAnalyzerProvider.java | 48 ++++++ .../analysis/TurkishAnalyzerProvider.java | 48 ++++++ .../analysis/IndicesAnalysisService.java | 128 +++++----------- 33 files changed, 1125 insertions(+), 196 deletions(-) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java index 71dea023aeb..7e789d31422 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java @@ -42,6 +42,23 @@ public class Analysis { return value != null && "_none_".equals(value); } + public static Set parseStemExclusion(Settings settings, Set defaultStemExclusion) { + String value = settings.get("stem_exclusion"); + if (value != null) { + if ("_none_".equals(value)) { + return ImmutableSet.of(); + } else { + return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value)); + } + } + String[] stopWords = settings.getAsArray("stem_exclusion", null); + if (stopWords != null) { + return ImmutableSet.copyOf(Iterators.forArray(stopWords)); + } else { + return defaultStemExclusion; + } + } + public static Set parseStopWords(Settings settings, Set defaultStopWords) { String value = settings.get("stopwords"); if (value != null) { @@ -63,14 +80,15 @@ public class Analysis { * Fetches a list of words from the specified settings file. The list should either be available at the key * specified by settingsPrefix or in a file specified by settingsPrefix + _path. * - * @throws ElasticSearchIllegalArgumentException If the word list cannot be found at either key. + * @throws ElasticSearchIllegalArgumentException + * If the word list cannot be found at either key. */ public static Set getWordList(Settings settings, String settingPrefix) { String wordListPath = settings.get(settingPrefix + "_path", null); if (wordListPath == null) { String[] explicitWordList = settings.getAsArray(settingPrefix, null); - if(explicitWordList == null) { + if (explicitWordList == null) { String message = String.format("%s or %s_path must be provided.", settingPrefix, settingPrefix); throw new ElasticSearchIllegalArgumentException(message); } else { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java index 1c9c83fe619..e36af0de311 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java @@ -311,7 +311,6 @@ public class AnalysisModule extends AbstractModule { @Override public void processCharFilters(CharFiltersBindings charFiltersBindings) { charFiltersBindings.processCharFilter("html_strip", HtmlStripCharFilterFactory.class); - charFiltersBindings.processCharFilter("htmlStrip", HtmlStripCharFilterFactory.class); } @Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { @@ -320,7 +319,6 @@ public class AnalysisModule extends AbstractModule { tokenFiltersBindings.processTokenFilter("asciifolding", ASCIIFoldingTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("length", LengthTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("lowercase", LowerCaseTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("porterStem", PorterStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("porter_stem", PorterStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("standard", StandardTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("nGram", NGramTokenFilterFactory.class); @@ -329,18 +327,14 @@ public class AnalysisModule extends AbstractModule { tokenFiltersBindings.processTokenFilter("edge_ngram", EdgeNGramTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("shingle", ShingleTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("phonetic", PhoneticTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("dictionaryDecompounder", DictionaryCompoundWordTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("dictionary_decompounder", DictionaryCompoundWordTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("hyphenationDecompounder", HyphenationCompoundWordTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("hypennation_decompounder", HyphenationCompoundWordTokenFilterFactory.class); } @Override public void processTokenizers(TokenizersBindings tokenizersBindings) { tokenizersBindings.processTokenizer("standard", StandardTokenizerFactory.class); tokenizersBindings.processTokenizer("uax_url_email", UAX29URLEmailTokenizerFactory.class); - tokenizersBindings.processTokenizer("uaxUrlEmail", UAX29URLEmailTokenizerFactory.class); tokenizersBindings.processTokenizer("path_hierarchy", PathHierarchyTokenizerFactory.class); - tokenizersBindings.processTokenizer("pathHierarchy", PathHierarchyTokenizerFactory.class); tokenizersBindings.processTokenizer("keyword", KeywordTokenizerFactory.class); tokenizersBindings.processTokenizer("letter", LetterTokenizerFactory.class); tokenizersBindings.processTokenizer("lowercase", LowerCaseTokenizerFactory.class); @@ -356,7 +350,6 @@ public class AnalysisModule extends AbstractModule { analyzersBindings.processAnalyzer("default", StandardAnalyzerProvider.class); analyzersBindings.processAnalyzer("standard", StandardAnalyzerProvider.class); analyzersBindings.processAnalyzer("standard_html_strip", StandardHtmlStripAnalyzerProvider.class); - analyzersBindings.processAnalyzer("standardHtmlStrip", StandardHtmlStripAnalyzerProvider.class); analyzersBindings.processAnalyzer("simple", SimpleAnalyzerProvider.class); analyzersBindings.processAnalyzer("stop", StopAnalyzerProvider.class); analyzersBindings.processAnalyzer("whitespace", WhitespaceAnalyzerProvider.class); @@ -367,20 +360,14 @@ public class AnalysisModule extends AbstractModule { private static class ExtendedProcessor extends AnalysisBinderProcessor { @Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { tokenFiltersBindings.processTokenFilter("snowball", SnowballTokenFilterFactory.class); + tokenFiltersBindings.processTokenFilter("stemmer", StemmerTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("arabicStem", ArabicStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("arabic_stem", ArabicStemTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("brazilianStem", BrazilianStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("brazilian_stem", BrazilianStemTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("czechStem", CzechStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("czech_stem", CzechStemTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("dutchStem", DutchStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("dutch_stem", DutchStemTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("frenchStem", FrenchStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("french_stem", FrenchStemTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("germanStem", GermanStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("german_stem", GermanStemTokenFilterFactory.class); - tokenFiltersBindings.processTokenFilter("russianStem", RussianStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("russian_stem", RussianStemTokenFilterFactory.class); } @@ -392,16 +379,34 @@ public class AnalysisModule extends AbstractModule { analyzersBindings.processAnalyzer("snowball", SnowballAnalyzerProvider.class); analyzersBindings.processAnalyzer("arabic", ArabicAnalyzerProvider.class); + analyzersBindings.processAnalyzer("armenian", ArmenianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("basque", BasqueAnalyzerProvider.class); analyzersBindings.processAnalyzer("brazilian", BrazilianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("bulgarian", BulgarianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("catalan", CatalanAnalyzerProvider.class); analyzersBindings.processAnalyzer("chinese", ChineseAnalyzerProvider.class); analyzersBindings.processAnalyzer("cjk", CjkAnalyzerProvider.class); analyzersBindings.processAnalyzer("czech", CzechAnalyzerProvider.class); + analyzersBindings.processAnalyzer("danish", DanishAnalyzerProvider.class); analyzersBindings.processAnalyzer("dutch", DutchAnalyzerProvider.class); + analyzersBindings.processAnalyzer("english", EnglishAnalyzerProvider.class); + analyzersBindings.processAnalyzer("finnish", FinnishAnalyzerProvider.class); analyzersBindings.processAnalyzer("french", FrenchAnalyzerProvider.class); + analyzersBindings.processAnalyzer("galician", GalicianAnalyzerProvider.class); analyzersBindings.processAnalyzer("german", GermanAnalyzerProvider.class); analyzersBindings.processAnalyzer("greek", GreekAnalyzerProvider.class); + analyzersBindings.processAnalyzer("hindi", HindiAnalyzerProvider.class); + analyzersBindings.processAnalyzer("hungarian", HungarianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("indonesian", IndonesianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("italian", ItalianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("norwegian", NorwegianAnalyzerProvider.class); analyzersBindings.processAnalyzer("persian", PersianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("portuguese", PortugueseAnalyzerProvider.class); + analyzersBindings.processAnalyzer("romanian", RomanianAnalyzerProvider.class); analyzersBindings.processAnalyzer("russian", RussianAnalyzerProvider.class); + analyzersBindings.processAnalyzer("spanish", SpanishAnalyzerProvider.class); + analyzersBindings.processAnalyzer("swedish", SwedishAnalyzerProvider.class); + analyzersBindings.processAnalyzer("turkish", TurkishAnalyzerProvider.class); analyzersBindings.processAnalyzer("thai", ThaiAnalyzerProvider.class); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java index 6389e1354a8..7b75fa43d27 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java @@ -89,6 +89,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) { NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get()); analyzers.put(analyzerFactory.name(), analyzer); + analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer); String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); if (strAliases != null) { for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) { @@ -116,6 +117,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, tokenizerSettings); tokenizers.put(tokenizerName, tokenizerFactory); + tokenizers.put(Strings.toCamelCase(tokenizerName), tokenizerFactory); } } this.tokenizers = ImmutableMap.copyOf(tokenizers); @@ -134,6 +136,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable CharFilterFactory tokenFilterFactory = charFilterFactoryFactory.create(charFilterName, charFilterSettings); charFilters.put(charFilterName, tokenFilterFactory); + charFilters.put(Strings.toCamelCase(charFilterName), tokenFilterFactory); } } this.charFilters = ImmutableMap.copyOf(charFilters); @@ -152,6 +155,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable TokenFilterFactory tokenFilterFactory = tokenFilterFactoryFactory.create(tokenFilterName, tokenFilterSettings); tokenFilters.put(tokenFilterName, tokenFilterFactory); + tokenFilters.put(Strings.toCamelCase(tokenFilterName), tokenFilterFactory); } } this.tokenFilters = ImmutableMap.copyOf(tokenFilters); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java index 975071616e3..5b7f4e83748 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; @@ -26,8 +27,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ @@ -37,8 +36,9 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()); - arabicAnalyzer = new ArabicAnalyzer(version, stopWords); + arabicAnalyzer = new ArabicAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); } @Override public ArabicAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java new file mode 100644 index 00000000000..7a49c2bea53 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final ArmenianAnalyzer analyzer; + + @Inject public ArmenianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new ArmenianAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public ArmenianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java new file mode 100644 index 00000000000..66557894667 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final BasqueAnalyzer analyzer; + + @Inject public BasqueAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new BasqueAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public BasqueAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java index 149ec11c3b8..d16c33ff64a 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java @@ -19,36 +19,26 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.br.BrazilianAnalyzer; -import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider { - private final Set stemExclusion; - private final BrazilianAnalyzer analyzer; @Inject public BrazilianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - Set stopWords = Analysis.parseStopWords(settings, BrazilianAnalyzer.getDefaultStopSet()); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.stemExclusion = ImmutableSet.of(); - } - analyzer = new BrazilianAnalyzer(version, stopWords, this.stemExclusion); + analyzer = new BrazilianAnalyzer(version, + Analysis.parseStopWords(settings, BrazilianAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); } @Override public BrazilianAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java new file mode 100644 index 00000000000..78b63d143ad --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java @@ -0,0 +1,47 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final BulgarianAnalyzer analyzer; + + @Inject public BulgarianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new BulgarianAnalyzer(version, + Analysis.parseStopWords(settings, BulgarianAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public BulgarianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java new file mode 100644 index 00000000000..51f485d41c9 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final CatalanAnalyzer analyzer; + + @Inject public CatalanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new CatalanAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public CatalanAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java index 92821189d15..6a9d0bc7ed0 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; @@ -26,8 +27,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ @@ -37,9 +36,9 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(settings, CzechAnalyzer.getDefaultStopSet()); - - analyzer = new CzechAnalyzer(version, stopWords); + analyzer = new CzechAnalyzer(version, + Analysis.parseStopWords(settings, CzechAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); } @Override public CzechAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java new file mode 100644 index 00000000000..cc243698f91 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final DanishAnalyzer analyzer; + + @Inject public DanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new DanishAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public DanishAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java index 0e815a8d6fc..64560747f0a 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java @@ -19,37 +19,26 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.nl.DutchAnalyzer; -import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider { - private final Set stemExclusion; - private final DutchAnalyzer analyzer; @Inject public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - Set stopWords = Analysis.parseStopWords(settings, DutchAnalyzer.getDefaultStopSet()); - - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.stemExclusion = ImmutableSet.of(); - } - analyzer = new DutchAnalyzer(version, stopWords, this.stemExclusion); + analyzer = new DutchAnalyzer(version, + Analysis.parseStopWords(settings, DutchAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); } @Override public DutchAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java new file mode 100644 index 00000000000..69ec8baf068 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final EnglishAnalyzer analyzer; + + @Inject public EnglishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new EnglishAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public EnglishAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java new file mode 100644 index 00000000000..479982cfcd6 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final FinnishAnalyzer analyzer; + + @Inject public FinnishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new FinnishAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public FinnishAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java index 1bcb958d72d..d7476ab8f3e 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java @@ -19,37 +19,26 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.fr.FrenchAnalyzer; -import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider { - private final Set stemExclusion; - private final FrenchAnalyzer analyzer; @Inject public FrenchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - Set stopWords = Analysis.parseStopWords(settings, FrenchAnalyzer.getDefaultStopSet()); - - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.stemExclusion = ImmutableSet.of(); - } - analyzer = new FrenchAnalyzer(version, stopWords, this.stemExclusion); + analyzer = new FrenchAnalyzer(version, + Analysis.parseStopWords(settings, FrenchAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); } @Override public FrenchAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java new file mode 100644 index 00000000000..93001b415c6 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final GalicianAnalyzer analyzer; + + @Inject public GalicianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new GalicianAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public GalicianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java index 7a163361b97..548f1287fc5 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java @@ -19,37 +19,26 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.de.GermanAnalyzer; -import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider { - private final Set stemExclusion; - private final GermanAnalyzer analyzer; @Inject public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - Set stopWords = Analysis.parseStopWords(settings, GermanAnalyzer.getDefaultStopSet()); - - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.stemExclusion = ImmutableSet.of(); - } - analyzer = new GermanAnalyzer(version, stopWords, this.stemExclusion); + analyzer = new GermanAnalyzer(version, + Analysis.parseStopWords(settings, GermanAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); } @Override public GermanAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java index e3bf63402a2..6d3ca226570 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java @@ -26,8 +26,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ @@ -37,9 +35,8 @@ public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(settings, GreekAnalyzer.getDefaultStopSet()); - - analyzer = new GreekAnalyzer(version, stopWords); + analyzer = new GreekAnalyzer(version, + Analysis.parseStopWords(settings, GreekAnalyzer.getDefaultStopSet())); } @Override public GreekAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java new file mode 100644 index 00000000000..fd2cef85962 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final HindiAnalyzer analyzer; + + @Inject public HindiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new HindiAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public HindiAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java new file mode 100644 index 00000000000..b8747b79638 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final HungarianAnalyzer analyzer; + + @Inject public HungarianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new HungarianAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public HungarianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java new file mode 100644 index 00000000000..5a8861e02c3 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.id.IndonesianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final IndonesianAnalyzer analyzer; + + @Inject public IndonesianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new IndonesianAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public IndonesianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java new file mode 100644 index 00000000000..52305820ba9 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.it.ItalianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final ItalianAnalyzer analyzer; + + @Inject public ItalianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new ItalianAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public ItalianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java new file mode 100644 index 00000000000..3fa4c10a850 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.no.NorwegianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final NorwegianAnalyzer analyzer; + + @Inject public NorwegianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new NorwegianAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public NorwegianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java index 52a3d7d350d..87774ef8488 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java @@ -26,8 +26,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ @@ -37,9 +35,8 @@ public class PersianAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(settings, PersianAnalyzer.getDefaultStopSet()); - - analyzer = new PersianAnalyzer(version, stopWords); + analyzer = new PersianAnalyzer(version, + Analysis.parseStopWords(settings, PersianAnalyzer.getDefaultStopSet())); } @Override public PersianAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java new file mode 100644 index 00000000000..2d7b79aa3c7 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final PortugueseAnalyzer analyzer; + + @Inject public PortugueseAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new PortugueseAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public PortugueseAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java new file mode 100644 index 00000000000..e764b866307 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.ro.RomanianAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final RomanianAnalyzer analyzer; + + @Inject public RomanianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new RomanianAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public RomanianAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java index bf839d0f786..7b7094716e6 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java @@ -19,16 +19,14 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ru.RussianAnalyzer; -import org.elasticsearch.common.collect.ImmutableSet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * @author kimchy (shay.banon) */ @@ -38,16 +36,9 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(settings, ImmutableSet.of()); - if (!stopWords.isEmpty()) { - analyzer = new RussianAnalyzer(version, stopWords); - } else { - analyzer = new RussianAnalyzer(version); - } - } + analyzer = new RussianAnalyzer(version, + Analysis.parseStopWords(settings, RussianAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); } @Override public RussianAnalyzer get() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java index a2481e8bd47..313b1442c15 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -39,7 +40,7 @@ public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory { @Inject public SnowballTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - this.language = settings.get("language", settings.get("name", "English")); + this.language = Strings.capitalize(settings.get("language", settings.get("name", "English"))); } @Override public TokenStream create(TokenStream tokenStream) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java new file mode 100644 index 00000000000..0b53abe7667 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final SpanishAnalyzer analyzer; + + @Inject public SpanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new SpanishAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public SpanishAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java new file mode 100644 index 00000000000..c402ca201f3 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java @@ -0,0 +1,145 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.PorterStemFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.de.GermanLightStemFilter; +import org.apache.lucene.analysis.de.GermanMinimalStemFilter; +import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; +import org.apache.lucene.analysis.en.EnglishPossessiveFilter; +import org.apache.lucene.analysis.es.SpanishLightStemFilter; +import org.apache.lucene.analysis.fi.FinnishLightStemFilter; +import org.apache.lucene.analysis.fr.FrenchLightStemFilter; +import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter; +import org.apache.lucene.analysis.hi.HindiStemFilter; +import org.apache.lucene.analysis.hu.HungarianLightStemFilter; +import org.apache.lucene.analysis.id.IndonesianStemFilter; +import org.apache.lucene.analysis.it.ItalianLightStemFilter; +import org.apache.lucene.analysis.pt.PortugueseLightStemFilter; +import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter; +import org.apache.lucene.analysis.pt.PortugueseStemFilter; +import org.apache.lucene.analysis.ru.RussianLightStemFilter; +import org.apache.lucene.analysis.snowball.SnowballFilter; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; +import org.tartarus.snowball.ext.*; + +/** + */ +public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { + + private String language; + + @Inject public StemmerTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); + } + + @Override public TokenStream create(TokenStream tokenStream) { + if ("armenian".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new ArmenianStemmer()); + } else if ("basque".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new BasqueStemmer()); + } else if ("catalan".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new CatalanStemmer()); + } else if ("danish".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new DanishStemmer()); + } else if ("dutch".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new DutchStemmer()); + } else if ("english".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new EnglishStemmer()); + } else if ("finnish".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new FinnishStemmer()); + } else if ("french".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new FrenchStemmer()); + } else if ("german".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new GermanStemmer()); + } else if ("german2".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new German2Stemmer()); + } else if ("hungarian".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new HungarianStemmer()); + } else if ("italian".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new ItalianStemmer()); + } else if ("kp".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new KpStemmer()); + } else if ("lovins".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new LovinsStemmer()); + } else if ("norwegian".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new NorwegianStemmer()); + } else if ("porter".equalsIgnoreCase(language)) { + return new PorterStemFilter(tokenStream); + } else if ("porter2".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new PorterStemmer()); + } else if ("portuguese".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new PortugueseStemmer()); + } else if ("romanian".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new RomanianStemmer()); + } else if ("russian".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new RussianStemmer()); + } else if ("spanish".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new SpanishStemmer()); + } else if ("swedish".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new SwedishStemmer()); + } else if ("turkish".equalsIgnoreCase(language)) { + return new SnowballFilter(tokenStream, new TurkishStemmer()); + } else if ("minimal_english".equalsIgnoreCase(language) || "minimalEnglish".equalsIgnoreCase(language)) { + return new EnglishMinimalStemFilter(tokenStream); + } else if ("possessive_english".equalsIgnoreCase(language) || "possessiveEnglish".equalsIgnoreCase(language)) { + return new EnglishPossessiveFilter(tokenStream); + } else if ("light_finish".equalsIgnoreCase(language) || "lightFinish".equalsIgnoreCase(language)) { + return new FinnishLightStemFilter(tokenStream); + } else if ("light_french".equalsIgnoreCase(language) || "lightFrench".equalsIgnoreCase(language)) { + return new FrenchLightStemFilter(tokenStream); + } else if ("minimal_french".equalsIgnoreCase(language) || "minimalFrench".equalsIgnoreCase(language)) { + return new FrenchMinimalStemFilter(tokenStream); + } else if ("light_german".equalsIgnoreCase(language) || "lightGerman".equalsIgnoreCase(language)) { + return new GermanLightStemFilter(tokenStream); + } else if ("minimal_german".equalsIgnoreCase(language) || "minimalGerman".equalsIgnoreCase(language)) { + return new GermanMinimalStemFilter(tokenStream); + } else if ("hindi".equalsIgnoreCase(language)) { + return new HindiStemFilter(tokenStream); + } else if ("light_hungarian".equalsIgnoreCase(language) || "lightHungarian".equalsIgnoreCase(language)) { + return new HungarianLightStemFilter(tokenStream); + } else if ("indonesian".equalsIgnoreCase(language)) { + return new IndonesianStemFilter(tokenStream); + } else if ("light_italian".equalsIgnoreCase(language) || "lightItalian".equalsIgnoreCase(language)) { + return new ItalianLightStemFilter(tokenStream); + } else if ("light_portuguese".equalsIgnoreCase(language) || "lightPortuguese".equalsIgnoreCase(language)) { + return new PortugueseLightStemFilter(tokenStream); + } else if ("minimal_portuguese".equalsIgnoreCase(language) || "minimalPortuguese".equalsIgnoreCase(language)) { + return new PortugueseMinimalStemFilter(tokenStream); + } else if ("portuguese".equalsIgnoreCase(language)) { + return new PortugueseStemFilter(tokenStream); + } else if ("light_russian".equalsIgnoreCase(language) || "lightRussian".equalsIgnoreCase(language)) { + return new RussianLightStemFilter(tokenStream); + } else if ("light_spanish".equalsIgnoreCase(language) || "lightSpanish".equalsIgnoreCase(language)) { + return new SpanishLightStemFilter(tokenStream); + } else if ("light_swedish".equalsIgnoreCase(language) || "lightSwedish".equalsIgnoreCase(language)) { + return new SpanishLightStemFilter(tokenStream); + } + return new SnowballFilter(tokenStream, language); + } + +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java new file mode 100644 index 00000000000..a27d58721eb --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final SwedishAnalyzer analyzer; + + @Inject public SwedishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new SwedishAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public SwedishAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java new file mode 100644 index 00000000000..d033a616be7 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * @author kimchy (shay.banon) + */ +public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final TurkishAnalyzer analyzer; + + @Inject public TurkishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + analyzer = new TurkishAnalyzer(version, + Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()), + Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); + } + + @Override public TurkishAnalyzer get() { + return this.analyzer; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java b/modules/elasticsearch/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java index 9d25b4a3773..63a4fdd421d 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java @@ -22,17 +22,30 @@ package org.elasticsearch.indices.analysis; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicStemFilter; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.apache.lucene.analysis.br.BrazilianStemFilter; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.apache.lucene.analysis.cn.ChineseAnalyzer; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.cz.CzechStemFilter; +import org.apache.lucene.analysis.da.DanishAnalyzer; import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.de.GermanStemFilter; import org.apache.lucene.analysis.el.GreekAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.fa.PersianAnalyzer; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.fr.FrenchStemFilter; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; +import org.apache.lucene.analysis.id.IndonesianAnalyzer; +import org.apache.lucene.analysis.it.ItalianAnalyzer; import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; @@ -40,8 +53,11 @@ import org.apache.lucene.analysis.ngram.NGramTokenFilter; import org.apache.lucene.analysis.ngram.NGramTokenizer; import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.nl.DutchStemFilter; +import org.apache.lucene.analysis.no.NorwegianAnalyzer; import org.apache.lucene.analysis.path.PathHierarchyTokenizer; +import org.apache.lucene.analysis.pt.PortugueseAnalyzer; import org.apache.lucene.analysis.reverse.ReverseStringFilter; +import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.ru.RussianStemFilter; import org.apache.lucene.analysis.shingle.ShingleFilter; @@ -51,7 +67,9 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; +import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.th.ThaiAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.Lucene; @@ -98,18 +116,36 @@ public class IndicesAnalysisService extends AbstractComponent { analyzerProviderFactories.put("pattern", new PreBuiltAnalyzerProviderFactory("pattern", AnalyzerScope.INDICES, new PatternAnalyzer(Lucene.ANALYZER_VERSION, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET))); analyzerProviderFactories.put("snowball", new PreBuiltAnalyzerProviderFactory("snowball", AnalyzerScope.INDICES, new SnowballAnalyzer(Lucene.ANALYZER_VERSION, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET))); analyzerProviderFactories.put("standard_html_strip", new PreBuiltAnalyzerProviderFactory("standard_html_strip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION))); - analyzerProviderFactories.put("standardHtmlStrip", new PreBuiltAnalyzerProviderFactory("standardHtmlStrip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("armenian", new PreBuiltAnalyzerProviderFactory("armenian", AnalyzerScope.INDICES, new ArmenianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("basque", new PreBuiltAnalyzerProviderFactory("basque", AnalyzerScope.INDICES, new BasqueAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("brazilian", new PreBuiltAnalyzerProviderFactory("brazilian", AnalyzerScope.INDICES, new BrazilianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("bulgarian", new PreBuiltAnalyzerProviderFactory("bulgarian", AnalyzerScope.INDICES, new BulgarianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("catalan", new PreBuiltAnalyzerProviderFactory("catalan", AnalyzerScope.INDICES, new CatalanAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("chinese", new PreBuiltAnalyzerProviderFactory("chinese", AnalyzerScope.INDICES, new ChineseAnalyzer())); analyzerProviderFactories.put("cjk", new PreBuiltAnalyzerProviderFactory("cjk", AnalyzerScope.INDICES, new ChineseAnalyzer())); analyzerProviderFactories.put("czech", new PreBuiltAnalyzerProviderFactory("czech", AnalyzerScope.INDICES, new CzechAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("dutch", new PreBuiltAnalyzerProviderFactory("dutch", AnalyzerScope.INDICES, new DutchAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("danish", new PreBuiltAnalyzerProviderFactory("danish", AnalyzerScope.INDICES, new DanishAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("english", new PreBuiltAnalyzerProviderFactory("english", AnalyzerScope.INDICES, new EnglishAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("finnish", new PreBuiltAnalyzerProviderFactory("finnish", AnalyzerScope.INDICES, new FinnishAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("french", new PreBuiltAnalyzerProviderFactory("french", AnalyzerScope.INDICES, new FrenchAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("galician", new PreBuiltAnalyzerProviderFactory("galician", AnalyzerScope.INDICES, new GalicianAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("german", new PreBuiltAnalyzerProviderFactory("german", AnalyzerScope.INDICES, new GermanAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("greek", new PreBuiltAnalyzerProviderFactory("greek", AnalyzerScope.INDICES, new GreekAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("persian", new PreBuiltAnalyzerProviderFactory("persian", AnalyzerScope.INDICES, new PersianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("portuguese", new PreBuiltAnalyzerProviderFactory("portuguese", AnalyzerScope.INDICES, new PortugueseAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("romanian", new PreBuiltAnalyzerProviderFactory("romanian", AnalyzerScope.INDICES, new RomanianAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("russian", new PreBuiltAnalyzerProviderFactory("russian", AnalyzerScope.INDICES, new RussianAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("spanish", new PreBuiltAnalyzerProviderFactory("spanish", AnalyzerScope.INDICES, new SpanishAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("swedish", new PreBuiltAnalyzerProviderFactory("swedish", AnalyzerScope.INDICES, new SwedishAnalyzer(Lucene.ANALYZER_VERSION))); + analyzerProviderFactories.put("turkish", new PreBuiltAnalyzerProviderFactory("turkish", AnalyzerScope.INDICES, new TurkishAnalyzer(Lucene.ANALYZER_VERSION))); analyzerProviderFactories.put("thai", new PreBuiltAnalyzerProviderFactory("thai", AnalyzerScope.INDICES, new ThaiAnalyzer(Lucene.ANALYZER_VERSION))); // Base Tokenizers @@ -133,16 +169,6 @@ public class IndicesAnalysisService extends AbstractComponent { } })); - tokenizerFactories.put("uaxUrlEmail", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { - @Override public String name() { - return "uaxUrlEmail"; - } - - @Override public Tokenizer create(Reader reader) { - return new UAX29URLEmailTokenizer(reader); - } - })); - tokenizerFactories.put("path_hierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { @Override public String name() { return "path_hierarchy"; @@ -153,16 +179,6 @@ public class IndicesAnalysisService extends AbstractComponent { } })); - tokenizerFactories.put("pathHierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { - @Override public String name() { - return "pathHierarchy"; - } - - @Override public Tokenizer create(Reader reader) { - return new PathHierarchyTokenizer(reader); - } - })); - tokenizerFactories.put("keyword", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { @Override public String name() { return "keyword"; @@ -294,16 +310,6 @@ public class IndicesAnalysisService extends AbstractComponent { } })); - tokenFilterFactories.put("porterStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override public String name() { - return "porterStem"; - } - - @Override public TokenStream create(TokenStream tokenStream) { - return new PorterStemFilter(tokenStream); - } - })); - tokenFilterFactories.put("porter_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "porter_stem"; @@ -384,13 +390,13 @@ public class IndicesAnalysisService extends AbstractComponent { return new SnowballFilter(tokenStream, "English"); } })); - tokenFilterFactories.put("arabicStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + tokenFilterFactories.put("stemmer", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { - return "arabicStem"; + return "stemmer"; } @Override public TokenStream create(TokenStream tokenStream) { - return new ArabicStemFilter(tokenStream); + return new PorterStemFilter(tokenStream); } })); tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @@ -402,15 +408,6 @@ public class IndicesAnalysisService extends AbstractComponent { return new ArabicStemFilter(tokenStream); } })); - tokenFilterFactories.put("brazilianStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override public String name() { - return "brazilianStem"; - } - - @Override public TokenStream create(TokenStream tokenStream) { - return new BrazilianStemFilter(tokenStream); - } - })); tokenFilterFactories.put("brazilian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "brazilian_stem"; @@ -420,15 +417,6 @@ public class IndicesAnalysisService extends AbstractComponent { return new BrazilianStemFilter(tokenStream); } })); - tokenFilterFactories.put("czechStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override public String name() { - return "czechStem"; - } - - @Override public TokenStream create(TokenStream tokenStream) { - return new CzechStemFilter(tokenStream); - } - })); tokenFilterFactories.put("czech_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "czech_stem"; @@ -438,15 +426,6 @@ public class IndicesAnalysisService extends AbstractComponent { return new CzechStemFilter(tokenStream); } })); - tokenFilterFactories.put("dutchStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override public String name() { - return "dutchStem"; - } - - @Override public TokenStream create(TokenStream tokenStream) { - return new DutchStemFilter(tokenStream); - } - })); tokenFilterFactories.put("dutch_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "dutch_stem"; @@ -456,15 +435,6 @@ public class IndicesAnalysisService extends AbstractComponent { return new DutchStemFilter(tokenStream); } })); - tokenFilterFactories.put("frenchStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override public String name() { - return "frenchStem"; - } - - @Override public TokenStream create(TokenStream tokenStream) { - return new FrenchStemFilter(tokenStream); - } - })); tokenFilterFactories.put("french_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "french_stem"; @@ -474,15 +444,6 @@ public class IndicesAnalysisService extends AbstractComponent { return new FrenchStemFilter(tokenStream); } })); - tokenFilterFactories.put("germanStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override public String name() { - return "germanStem"; - } - - @Override public TokenStream create(TokenStream tokenStream) { - return new GermanStemFilter(tokenStream); - } - })); tokenFilterFactories.put("german_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "german_stem"; @@ -492,15 +453,6 @@ public class IndicesAnalysisService extends AbstractComponent { return new GermanStemFilter(tokenStream); } })); - tokenFilterFactories.put("russianStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override public String name() { - return "russianStem"; - } - - @Override public TokenStream create(TokenStream tokenStream) { - return new RussianStemFilter(tokenStream); - } - })); tokenFilterFactories.put("russian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "russian_stem";