Analysis: Expose light and minimal language token filters, closes #908.
This commit is contained in:
parent
0f78100b97
commit
85eae8b3f5
|
@ -42,6 +42,23 @@ public class Analysis {
|
|||
return value != null && "_none_".equals(value);
|
||||
}
|
||||
|
||||
public static Set<?> parseStemExclusion(Settings settings, Set<?> defaultStemExclusion) {
|
||||
String value = settings.get("stem_exclusion");
|
||||
if (value != null) {
|
||||
if ("_none_".equals(value)) {
|
||||
return ImmutableSet.of();
|
||||
} else {
|
||||
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value));
|
||||
}
|
||||
}
|
||||
String[] stopWords = settings.getAsArray("stem_exclusion", null);
|
||||
if (stopWords != null) {
|
||||
return ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||
} else {
|
||||
return defaultStemExclusion;
|
||||
}
|
||||
}
|
||||
|
||||
public static Set<?> parseStopWords(Settings settings, Set<?> defaultStopWords) {
|
||||
String value = settings.get("stopwords");
|
||||
if (value != null) {
|
||||
|
@ -63,14 +80,15 @@ public class Analysis {
|
|||
* Fetches a list of words from the specified settings file. The list should either be available at the key
|
||||
* specified by settingsPrefix or in a file specified by settingsPrefix + _path.
|
||||
*
|
||||
* @throws ElasticSearchIllegalArgumentException If the word list cannot be found at either key.
|
||||
* @throws ElasticSearchIllegalArgumentException
|
||||
* If the word list cannot be found at either key.
|
||||
*/
|
||||
public static Set<String> getWordList(Settings settings, String settingPrefix) {
|
||||
String wordListPath = settings.get(settingPrefix + "_path", null);
|
||||
|
||||
if (wordListPath == null) {
|
||||
String[] explicitWordList = settings.getAsArray(settingPrefix, null);
|
||||
if(explicitWordList == null) {
|
||||
if (explicitWordList == null) {
|
||||
String message = String.format("%s or %s_path must be provided.", settingPrefix, settingPrefix);
|
||||
throw new ElasticSearchIllegalArgumentException(message);
|
||||
} else {
|
||||
|
|
|
@ -311,7 +311,6 @@ public class AnalysisModule extends AbstractModule {
|
|||
|
||||
@Override public void processCharFilters(CharFiltersBindings charFiltersBindings) {
|
||||
charFiltersBindings.processCharFilter("html_strip", HtmlStripCharFilterFactory.class);
|
||||
charFiltersBindings.processCharFilter("htmlStrip", HtmlStripCharFilterFactory.class);
|
||||
}
|
||||
|
||||
@Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
|
||||
|
@ -320,7 +319,6 @@ public class AnalysisModule extends AbstractModule {
|
|||
tokenFiltersBindings.processTokenFilter("asciifolding", ASCIIFoldingTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("length", LengthTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("lowercase", LowerCaseTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("porterStem", PorterStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("porter_stem", PorterStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("standard", StandardTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("nGram", NGramTokenFilterFactory.class);
|
||||
|
@ -329,18 +327,14 @@ public class AnalysisModule extends AbstractModule {
|
|||
tokenFiltersBindings.processTokenFilter("edge_ngram", EdgeNGramTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("shingle", ShingleTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("phonetic", PhoneticTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("dictionaryDecompounder", DictionaryCompoundWordTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("dictionary_decompounder", DictionaryCompoundWordTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("hyphenationDecompounder", HyphenationCompoundWordTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("hypennation_decompounder", HyphenationCompoundWordTokenFilterFactory.class);
|
||||
}
|
||||
|
||||
@Override public void processTokenizers(TokenizersBindings tokenizersBindings) {
|
||||
tokenizersBindings.processTokenizer("standard", StandardTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("uax_url_email", UAX29URLEmailTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("uaxUrlEmail", UAX29URLEmailTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("path_hierarchy", PathHierarchyTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("pathHierarchy", PathHierarchyTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("keyword", KeywordTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("letter", LetterTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("lowercase", LowerCaseTokenizerFactory.class);
|
||||
|
@ -356,7 +350,6 @@ public class AnalysisModule extends AbstractModule {
|
|||
analyzersBindings.processAnalyzer("default", StandardAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("standard", StandardAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("standard_html_strip", StandardHtmlStripAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("standardHtmlStrip", StandardHtmlStripAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("simple", SimpleAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("stop", StopAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("whitespace", WhitespaceAnalyzerProvider.class);
|
||||
|
@ -367,20 +360,14 @@ public class AnalysisModule extends AbstractModule {
|
|||
private static class ExtendedProcessor extends AnalysisBinderProcessor {
|
||||
@Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
|
||||
tokenFiltersBindings.processTokenFilter("snowball", SnowballTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("stemmer", StemmerTokenFilterFactory.class);
|
||||
|
||||
tokenFiltersBindings.processTokenFilter("arabicStem", ArabicStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("arabic_stem", ArabicStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("brazilianStem", BrazilianStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("brazilian_stem", BrazilianStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("czechStem", CzechStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("czech_stem", CzechStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("dutchStem", DutchStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("dutch_stem", DutchStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("frenchStem", FrenchStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("french_stem", FrenchStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("germanStem", GermanStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("german_stem", GermanStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("russianStem", RussianStemTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("russian_stem", RussianStemTokenFilterFactory.class);
|
||||
}
|
||||
|
||||
|
@ -392,16 +379,34 @@ public class AnalysisModule extends AbstractModule {
|
|||
analyzersBindings.processAnalyzer("snowball", SnowballAnalyzerProvider.class);
|
||||
|
||||
analyzersBindings.processAnalyzer("arabic", ArabicAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("armenian", ArmenianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("basque", BasqueAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("brazilian", BrazilianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("bulgarian", BulgarianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("catalan", CatalanAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("chinese", ChineseAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("cjk", CjkAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("czech", CzechAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("danish", DanishAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("dutch", DutchAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("english", EnglishAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("finnish", FinnishAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("french", FrenchAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("galician", GalicianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("german", GermanAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("greek", GreekAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("hindi", HindiAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("hungarian", HungarianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("indonesian", IndonesianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("italian", ItalianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("norwegian", NorwegianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("persian", PersianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("portuguese", PortugueseAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("romanian", RomanianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("russian", RussianAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("spanish", SpanishAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("swedish", SwedishAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("turkish", TurkishAnalyzerProvider.class);
|
||||
analyzersBindings.processAnalyzer("thai", ThaiAnalyzerProvider.class);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,6 +89,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) {
|
||||
NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get());
|
||||
analyzers.put(analyzerFactory.name(), analyzer);
|
||||
analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer);
|
||||
String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
|
||||
if (strAliases != null) {
|
||||
for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
|
||||
|
@ -116,6 +117,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
|
||||
TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, tokenizerSettings);
|
||||
tokenizers.put(tokenizerName, tokenizerFactory);
|
||||
tokenizers.put(Strings.toCamelCase(tokenizerName), tokenizerFactory);
|
||||
}
|
||||
}
|
||||
this.tokenizers = ImmutableMap.copyOf(tokenizers);
|
||||
|
@ -134,6 +136,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
|
||||
CharFilterFactory tokenFilterFactory = charFilterFactoryFactory.create(charFilterName, charFilterSettings);
|
||||
charFilters.put(charFilterName, tokenFilterFactory);
|
||||
charFilters.put(Strings.toCamelCase(charFilterName), tokenFilterFactory);
|
||||
}
|
||||
}
|
||||
this.charFilters = ImmutableMap.copyOf(charFilters);
|
||||
|
@ -152,6 +155,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
|
||||
TokenFilterFactory tokenFilterFactory = tokenFilterFactoryFactory.create(tokenFilterName, tokenFilterSettings);
|
||||
tokenFilters.put(tokenFilterName, tokenFilterFactory);
|
||||
tokenFilters.put(Strings.toCamelCase(tokenFilterName), tokenFilterFactory);
|
||||
}
|
||||
}
|
||||
this.tokenFilters = ImmutableMap.copyOf(tokenFilters);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -26,8 +27,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
|
@ -37,8 +36,9 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
|
|||
|
||||
@Inject public ArabicAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet());
|
||||
arabicAnalyzer = new ArabicAnalyzer(version, stopWords);
|
||||
arabicAnalyzer = new ArabicAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public ArabicAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArmenianAnalyzer> {
|
||||
|
||||
private final ArmenianAnalyzer analyzer;
|
||||
|
||||
@Inject public ArmenianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ArmenianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public ArmenianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<BasqueAnalyzer> {
|
||||
|
||||
private final BasqueAnalyzer analyzer;
|
||||
|
||||
@Inject public BasqueAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BasqueAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public BasqueAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -19,36 +19,26 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BrazilianAnalyzer> {
|
||||
|
||||
private final Set<?> stemExclusion;
|
||||
|
||||
private final BrazilianAnalyzer analyzer;
|
||||
|
||||
@Inject public BrazilianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, BrazilianAnalyzer.getDefaultStopSet());
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new BrazilianAnalyzer(version, stopWords, this.stemExclusion);
|
||||
analyzer = new BrazilianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, BrazilianAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public BrazilianAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BulgarianAnalyzer> {
|
||||
|
||||
private final BulgarianAnalyzer analyzer;
|
||||
|
||||
@Inject public BulgarianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BulgarianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, BulgarianAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public BulgarianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<CatalanAnalyzer> {
|
||||
|
||||
private final CatalanAnalyzer analyzer;
|
||||
|
||||
@Inject public CatalanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new CatalanAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public CatalanAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -26,8 +27,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
|
@ -37,9 +36,9 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
|
|||
|
||||
@Inject public CzechAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, CzechAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new CzechAnalyzer(version, stopWords);
|
||||
analyzer = new CzechAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, CzechAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public CzechAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<DanishAnalyzer> {
|
||||
|
||||
private final DanishAnalyzer analyzer;
|
||||
|
||||
@Inject public DanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new DanishAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public DanishAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -19,37 +19,26 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAnalyzer> {
|
||||
|
||||
private final Set<?> stemExclusion;
|
||||
|
||||
private final DutchAnalyzer analyzer;
|
||||
|
||||
@Inject public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, DutchAnalyzer.getDefaultStopSet());
|
||||
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new DutchAnalyzer(version, stopWords, this.stemExclusion);
|
||||
analyzer = new DutchAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, DutchAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public DutchAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<EnglishAnalyzer> {
|
||||
|
||||
private final EnglishAnalyzer analyzer;
|
||||
|
||||
@Inject public EnglishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new EnglishAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public EnglishAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<FinnishAnalyzer> {
|
||||
|
||||
private final FinnishAnalyzer analyzer;
|
||||
|
||||
@Inject public FinnishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new FinnishAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public FinnishAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -19,37 +19,26 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<FrenchAnalyzer> {
|
||||
|
||||
private final Set<?> stemExclusion;
|
||||
|
||||
private final FrenchAnalyzer analyzer;
|
||||
|
||||
@Inject public FrenchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, FrenchAnalyzer.getDefaultStopSet());
|
||||
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new FrenchAnalyzer(version, stopWords, this.stemExclusion);
|
||||
analyzer = new FrenchAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, FrenchAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public FrenchAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<GalicianAnalyzer> {
|
||||
|
||||
private final GalicianAnalyzer analyzer;
|
||||
|
||||
@Inject public GalicianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new GalicianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public GalicianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -19,37 +19,26 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<GermanAnalyzer> {
|
||||
|
||||
private final Set<?> stemExclusion;
|
||||
|
||||
private final GermanAnalyzer analyzer;
|
||||
|
||||
@Inject public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, GermanAnalyzer.getDefaultStopSet());
|
||||
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new GermanAnalyzer(version, stopWords, this.stemExclusion);
|
||||
analyzer = new GermanAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, GermanAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public GermanAnalyzer get() {
|
||||
|
|
|
@ -26,8 +26,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
|
@ -37,9 +35,8 @@ public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider<GreekAn
|
|||
|
||||
@Inject public GreekAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, GreekAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new GreekAnalyzer(version, stopWords);
|
||||
analyzer = new GreekAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, GreekAnalyzer.getDefaultStopSet()));
|
||||
}
|
||||
|
||||
@Override public GreekAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider<HindiAnalyzer> {
|
||||
|
||||
private final HindiAnalyzer analyzer;
|
||||
|
||||
@Inject public HindiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new HindiAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public HindiAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<HungarianAnalyzer> {
|
||||
|
||||
private final HungarianAnalyzer analyzer;
|
||||
|
||||
@Inject public HungarianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new HungarianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public HungarianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider<IndonesianAnalyzer> {
|
||||
|
||||
private final IndonesianAnalyzer analyzer;
|
||||
|
||||
@Inject public IndonesianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new IndonesianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public IndonesianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider<ItalianAnalyzer> {
|
||||
|
||||
private final ItalianAnalyzer analyzer;
|
||||
|
||||
@Inject public ItalianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ItalianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public ItalianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider<NorwegianAnalyzer> {
|
||||
|
||||
private final NorwegianAnalyzer analyzer;
|
||||
|
||||
@Inject public NorwegianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new NorwegianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public NorwegianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -26,8 +26,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
|
@ -37,9 +35,8 @@ public class PersianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Persi
|
|||
|
||||
@Inject public PersianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, PersianAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new PersianAnalyzer(version, stopWords);
|
||||
analyzer = new PersianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, PersianAnalyzer.getDefaultStopSet()));
|
||||
}
|
||||
|
||||
@Override public PersianAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider<PortugueseAnalyzer> {
|
||||
|
||||
private final PortugueseAnalyzer analyzer;
|
||||
|
||||
@Inject public PortugueseAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new PortugueseAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public PortugueseAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<RomanianAnalyzer> {
|
||||
|
||||
private final RomanianAnalyzer analyzer;
|
||||
|
||||
@Inject public RomanianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new RomanianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public RomanianAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -19,16 +19,14 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
|
@ -38,16 +36,9 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Russi
|
|||
|
||||
@Inject public RussianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
if (Analysis.isNoStopwords(settings)) {
|
||||
analyzer = new RussianAnalyzer(version, ImmutableSet.of());
|
||||
} else {
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, ImmutableSet.of());
|
||||
if (!stopWords.isEmpty()) {
|
||||
analyzer = new RussianAnalyzer(version, stopWords);
|
||||
} else {
|
||||
analyzer = new RussianAnalyzer(version);
|
||||
}
|
||||
}
|
||||
analyzer = new RussianAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, RussianAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public RussianAnalyzer get() {
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -39,7 +40,7 @@ public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Inject public SnowballTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.language = settings.get("language", settings.get("name", "English"));
|
||||
this.language = Strings.capitalize(settings.get("language", settings.get("name", "English")));
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<SpanishAnalyzer> {
|
||||
|
||||
private final SpanishAnalyzer analyzer;
|
||||
|
||||
@Inject public SpanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new SpanishAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public SpanishAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanLightStemFilter;
|
||||
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
||||
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
||||
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
|
||||
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
|
||||
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.hi.HindiStemFilter;
|
||||
import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
|
||||
import org.apache.lucene.analysis.id.IndonesianStemFilter;
|
||||
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
||||
import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
|
||||
import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.pt.PortugueseStemFilter;
|
||||
import org.apache.lucene.analysis.ru.RussianLightStemFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.tartarus.snowball.ext.*;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private String language;
|
||||
|
||||
@Inject public StemmerTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
if ("armenian".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new ArmenianStemmer());
|
||||
} else if ("basque".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new BasqueStemmer());
|
||||
} else if ("catalan".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new CatalanStemmer());
|
||||
} else if ("danish".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new DanishStemmer());
|
||||
} else if ("dutch".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new DutchStemmer());
|
||||
} else if ("english".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new EnglishStemmer());
|
||||
} else if ("finnish".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new FinnishStemmer());
|
||||
} else if ("french".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new FrenchStemmer());
|
||||
} else if ("german".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new GermanStemmer());
|
||||
} else if ("german2".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new German2Stemmer());
|
||||
} else if ("hungarian".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new HungarianStemmer());
|
||||
} else if ("italian".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new ItalianStemmer());
|
||||
} else if ("kp".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new KpStemmer());
|
||||
} else if ("lovins".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new LovinsStemmer());
|
||||
} else if ("norwegian".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new NorwegianStemmer());
|
||||
} else if ("porter".equalsIgnoreCase(language)) {
|
||||
return new PorterStemFilter(tokenStream);
|
||||
} else if ("porter2".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new PorterStemmer());
|
||||
} else if ("portuguese".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new PortugueseStemmer());
|
||||
} else if ("romanian".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new RomanianStemmer());
|
||||
} else if ("russian".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new RussianStemmer());
|
||||
} else if ("spanish".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new SpanishStemmer());
|
||||
} else if ("swedish".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new SwedishStemmer());
|
||||
} else if ("turkish".equalsIgnoreCase(language)) {
|
||||
return new SnowballFilter(tokenStream, new TurkishStemmer());
|
||||
} else if ("minimal_english".equalsIgnoreCase(language) || "minimalEnglish".equalsIgnoreCase(language)) {
|
||||
return new EnglishMinimalStemFilter(tokenStream);
|
||||
} else if ("possessive_english".equalsIgnoreCase(language) || "possessiveEnglish".equalsIgnoreCase(language)) {
|
||||
return new EnglishPossessiveFilter(tokenStream);
|
||||
} else if ("light_finish".equalsIgnoreCase(language) || "lightFinish".equalsIgnoreCase(language)) {
|
||||
return new FinnishLightStemFilter(tokenStream);
|
||||
} else if ("light_french".equalsIgnoreCase(language) || "lightFrench".equalsIgnoreCase(language)) {
|
||||
return new FrenchLightStemFilter(tokenStream);
|
||||
} else if ("minimal_french".equalsIgnoreCase(language) || "minimalFrench".equalsIgnoreCase(language)) {
|
||||
return new FrenchMinimalStemFilter(tokenStream);
|
||||
} else if ("light_german".equalsIgnoreCase(language) || "lightGerman".equalsIgnoreCase(language)) {
|
||||
return new GermanLightStemFilter(tokenStream);
|
||||
} else if ("minimal_german".equalsIgnoreCase(language) || "minimalGerman".equalsIgnoreCase(language)) {
|
||||
return new GermanMinimalStemFilter(tokenStream);
|
||||
} else if ("hindi".equalsIgnoreCase(language)) {
|
||||
return new HindiStemFilter(tokenStream);
|
||||
} else if ("light_hungarian".equalsIgnoreCase(language) || "lightHungarian".equalsIgnoreCase(language)) {
|
||||
return new HungarianLightStemFilter(tokenStream);
|
||||
} else if ("indonesian".equalsIgnoreCase(language)) {
|
||||
return new IndonesianStemFilter(tokenStream);
|
||||
} else if ("light_italian".equalsIgnoreCase(language) || "lightItalian".equalsIgnoreCase(language)) {
|
||||
return new ItalianLightStemFilter(tokenStream);
|
||||
} else if ("light_portuguese".equalsIgnoreCase(language) || "lightPortuguese".equalsIgnoreCase(language)) {
|
||||
return new PortugueseLightStemFilter(tokenStream);
|
||||
} else if ("minimal_portuguese".equalsIgnoreCase(language) || "minimalPortuguese".equalsIgnoreCase(language)) {
|
||||
return new PortugueseMinimalStemFilter(tokenStream);
|
||||
} else if ("portuguese".equalsIgnoreCase(language)) {
|
||||
return new PortugueseStemFilter(tokenStream);
|
||||
} else if ("light_russian".equalsIgnoreCase(language) || "lightRussian".equalsIgnoreCase(language)) {
|
||||
return new RussianLightStemFilter(tokenStream);
|
||||
} else if ("light_spanish".equalsIgnoreCase(language) || "lightSpanish".equalsIgnoreCase(language)) {
|
||||
return new SpanishLightStemFilter(tokenStream);
|
||||
} else if ("light_swedish".equalsIgnoreCase(language) || "lightSwedish".equalsIgnoreCase(language)) {
|
||||
return new SpanishLightStemFilter(tokenStream);
|
||||
}
|
||||
return new SnowballFilter(tokenStream, language);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider<SwedishAnalyzer> {
|
||||
|
||||
private final SwedishAnalyzer analyzer;
|
||||
|
||||
@Inject public SwedishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new SwedishAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public SwedishAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider<TurkishAnalyzer> {
|
||||
|
||||
private final TurkishAnalyzer analyzer;
|
||||
|
||||
@Inject public TurkishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new TurkishAnalyzer(version,
|
||||
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
||||
@Override public TurkishAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -22,17 +22,30 @@ package org.elasticsearch.indices.analysis;
|
|||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianAnalyzer;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||
|
@ -40,8 +53,11 @@ import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
|||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.apache.lucene.analysis.ru.RussianStemFilter;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
|
@ -51,7 +67,9 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
@ -98,18 +116,36 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
analyzerProviderFactories.put("pattern", new PreBuiltAnalyzerProviderFactory("pattern", AnalyzerScope.INDICES, new PatternAnalyzer(Lucene.ANALYZER_VERSION, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
|
||||
analyzerProviderFactories.put("snowball", new PreBuiltAnalyzerProviderFactory("snowball", AnalyzerScope.INDICES, new SnowballAnalyzer(Lucene.ANALYZER_VERSION, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
|
||||
analyzerProviderFactories.put("standard_html_strip", new PreBuiltAnalyzerProviderFactory("standard_html_strip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("standardHtmlStrip", new PreBuiltAnalyzerProviderFactory("standardHtmlStrip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
|
||||
analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("armenian", new PreBuiltAnalyzerProviderFactory("armenian", AnalyzerScope.INDICES, new ArmenianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("basque", new PreBuiltAnalyzerProviderFactory("basque", AnalyzerScope.INDICES, new BasqueAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("brazilian", new PreBuiltAnalyzerProviderFactory("brazilian", AnalyzerScope.INDICES, new BrazilianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("bulgarian", new PreBuiltAnalyzerProviderFactory("bulgarian", AnalyzerScope.INDICES, new BulgarianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("catalan", new PreBuiltAnalyzerProviderFactory("catalan", AnalyzerScope.INDICES, new CatalanAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("chinese", new PreBuiltAnalyzerProviderFactory("chinese", AnalyzerScope.INDICES, new ChineseAnalyzer()));
|
||||
analyzerProviderFactories.put("cjk", new PreBuiltAnalyzerProviderFactory("cjk", AnalyzerScope.INDICES, new ChineseAnalyzer()));
|
||||
analyzerProviderFactories.put("czech", new PreBuiltAnalyzerProviderFactory("czech", AnalyzerScope.INDICES, new CzechAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("dutch", new PreBuiltAnalyzerProviderFactory("dutch", AnalyzerScope.INDICES, new DutchAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("danish", new PreBuiltAnalyzerProviderFactory("danish", AnalyzerScope.INDICES, new DanishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("english", new PreBuiltAnalyzerProviderFactory("english", AnalyzerScope.INDICES, new EnglishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("finnish", new PreBuiltAnalyzerProviderFactory("finnish", AnalyzerScope.INDICES, new FinnishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("french", new PreBuiltAnalyzerProviderFactory("french", AnalyzerScope.INDICES, new FrenchAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("galician", new PreBuiltAnalyzerProviderFactory("galician", AnalyzerScope.INDICES, new GalicianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("german", new PreBuiltAnalyzerProviderFactory("german", AnalyzerScope.INDICES, new GermanAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("greek", new PreBuiltAnalyzerProviderFactory("greek", AnalyzerScope.INDICES, new GreekAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("persian", new PreBuiltAnalyzerProviderFactory("persian", AnalyzerScope.INDICES, new PersianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("portuguese", new PreBuiltAnalyzerProviderFactory("portuguese", AnalyzerScope.INDICES, new PortugueseAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("romanian", new PreBuiltAnalyzerProviderFactory("romanian", AnalyzerScope.INDICES, new RomanianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("russian", new PreBuiltAnalyzerProviderFactory("russian", AnalyzerScope.INDICES, new RussianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("spanish", new PreBuiltAnalyzerProviderFactory("spanish", AnalyzerScope.INDICES, new SpanishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("swedish", new PreBuiltAnalyzerProviderFactory("swedish", AnalyzerScope.INDICES, new SwedishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("turkish", new PreBuiltAnalyzerProviderFactory("turkish", AnalyzerScope.INDICES, new TurkishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("thai", new PreBuiltAnalyzerProviderFactory("thai", AnalyzerScope.INDICES, new ThaiAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
|
||||
// Base Tokenizers
|
||||
|
@ -133,16 +169,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("uaxUrlEmail", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override public String name() {
|
||||
return "uaxUrlEmail";
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
return new UAX29URLEmailTokenizer(reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("path_hierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override public String name() {
|
||||
return "path_hierarchy";
|
||||
|
@ -153,16 +179,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("pathHierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override public String name() {
|
||||
return "pathHierarchy";
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
return new PathHierarchyTokenizer(reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("keyword", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override public String name() {
|
||||
return "keyword";
|
||||
|
@ -294,16 +310,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("porterStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "porterStem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new PorterStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("porter_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "porter_stem";
|
||||
|
@ -384,13 +390,13 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
return new SnowballFilter(tokenStream, "English");
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("arabicStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
tokenFilterFactories.put("stemmer", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "arabicStem";
|
||||
return "stemmer";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new ArabicStemFilter(tokenStream);
|
||||
return new PorterStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
|
@ -402,15 +408,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
return new ArabicStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("brazilianStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "brazilianStem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new BrazilianStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("brazilian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "brazilian_stem";
|
||||
|
@ -420,15 +417,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
return new BrazilianStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("czechStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "czechStem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new CzechStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("czech_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "czech_stem";
|
||||
|
@ -438,15 +426,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
return new CzechStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("dutchStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "dutchStem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new DutchStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("dutch_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "dutch_stem";
|
||||
|
@ -456,15 +435,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
return new DutchStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("frenchStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "frenchStem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new FrenchStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("french_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "french_stem";
|
||||
|
@ -474,15 +444,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
return new FrenchStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("germanStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "germanStem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new GermanStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("german_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "german_stem";
|
||||
|
@ -492,15 +453,6 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
return new GermanStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("russianStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "russianStem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new RussianStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("russian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "russian_stem";
|
||||
|
|
Loading…
Reference in New Issue