Analysis: Expose light and minimal language token filters, closes #908.

This commit is contained in:
kimchy 2011-05-05 23:45:59 +03:00
parent 0f78100b97
commit 85eae8b3f5
33 changed files with 1125 additions and 196 deletions

View File

@ -42,6 +42,23 @@ public class Analysis {
return value != null && "_none_".equals(value);
}
public static Set<?> parseStemExclusion(Settings settings, Set<?> defaultStemExclusion) {
String value = settings.get("stem_exclusion");
if (value != null) {
if ("_none_".equals(value)) {
return ImmutableSet.of();
} else {
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value));
}
}
String[] stopWords = settings.getAsArray("stem_exclusion", null);
if (stopWords != null) {
return ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
return defaultStemExclusion;
}
}
public static Set<?> parseStopWords(Settings settings, Set<?> defaultStopWords) {
String value = settings.get("stopwords");
if (value != null) {
@ -63,14 +80,15 @@ public class Analysis {
* Fetches a list of words from the specified settings file. The list should either be available at the key
* specified by settingsPrefix or in a file specified by settingsPrefix + _path.
*
* @throws ElasticSearchIllegalArgumentException If the word list cannot be found at either key.
* @throws ElasticSearchIllegalArgumentException
* If the word list cannot be found at either key.
*/
public static Set<String> getWordList(Settings settings, String settingPrefix) {
String wordListPath = settings.get(settingPrefix + "_path", null);
if (wordListPath == null) {
String[] explicitWordList = settings.getAsArray(settingPrefix, null);
if(explicitWordList == null) {
if (explicitWordList == null) {
String message = String.format("%s or %s_path must be provided.", settingPrefix, settingPrefix);
throw new ElasticSearchIllegalArgumentException(message);
} else {

View File

@ -311,7 +311,6 @@ public class AnalysisModule extends AbstractModule {
@Override public void processCharFilters(CharFiltersBindings charFiltersBindings) {
charFiltersBindings.processCharFilter("html_strip", HtmlStripCharFilterFactory.class);
charFiltersBindings.processCharFilter("htmlStrip", HtmlStripCharFilterFactory.class);
}
@Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
@ -320,7 +319,6 @@ public class AnalysisModule extends AbstractModule {
tokenFiltersBindings.processTokenFilter("asciifolding", ASCIIFoldingTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("length", LengthTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("lowercase", LowerCaseTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("porterStem", PorterStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("porter_stem", PorterStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("standard", StandardTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("nGram", NGramTokenFilterFactory.class);
@ -329,18 +327,14 @@ public class AnalysisModule extends AbstractModule {
tokenFiltersBindings.processTokenFilter("edge_ngram", EdgeNGramTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("shingle", ShingleTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("phonetic", PhoneticTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("dictionaryDecompounder", DictionaryCompoundWordTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("dictionary_decompounder", DictionaryCompoundWordTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("hyphenationDecompounder", HyphenationCompoundWordTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("hypennation_decompounder", HyphenationCompoundWordTokenFilterFactory.class);
}
@Override public void processTokenizers(TokenizersBindings tokenizersBindings) {
tokenizersBindings.processTokenizer("standard", StandardTokenizerFactory.class);
tokenizersBindings.processTokenizer("uax_url_email", UAX29URLEmailTokenizerFactory.class);
tokenizersBindings.processTokenizer("uaxUrlEmail", UAX29URLEmailTokenizerFactory.class);
tokenizersBindings.processTokenizer("path_hierarchy", PathHierarchyTokenizerFactory.class);
tokenizersBindings.processTokenizer("pathHierarchy", PathHierarchyTokenizerFactory.class);
tokenizersBindings.processTokenizer("keyword", KeywordTokenizerFactory.class);
tokenizersBindings.processTokenizer("letter", LetterTokenizerFactory.class);
tokenizersBindings.processTokenizer("lowercase", LowerCaseTokenizerFactory.class);
@ -356,7 +350,6 @@ public class AnalysisModule extends AbstractModule {
analyzersBindings.processAnalyzer("default", StandardAnalyzerProvider.class);
analyzersBindings.processAnalyzer("standard", StandardAnalyzerProvider.class);
analyzersBindings.processAnalyzer("standard_html_strip", StandardHtmlStripAnalyzerProvider.class);
analyzersBindings.processAnalyzer("standardHtmlStrip", StandardHtmlStripAnalyzerProvider.class);
analyzersBindings.processAnalyzer("simple", SimpleAnalyzerProvider.class);
analyzersBindings.processAnalyzer("stop", StopAnalyzerProvider.class);
analyzersBindings.processAnalyzer("whitespace", WhitespaceAnalyzerProvider.class);
@ -367,20 +360,14 @@ public class AnalysisModule extends AbstractModule {
private static class ExtendedProcessor extends AnalysisBinderProcessor {
@Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
tokenFiltersBindings.processTokenFilter("snowball", SnowballTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("stemmer", StemmerTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("arabicStem", ArabicStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("arabic_stem", ArabicStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("brazilianStem", BrazilianStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("brazilian_stem", BrazilianStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("czechStem", CzechStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("czech_stem", CzechStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("dutchStem", DutchStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("dutch_stem", DutchStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("frenchStem", FrenchStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("french_stem", FrenchStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("germanStem", GermanStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("german_stem", GermanStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("russianStem", RussianStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("russian_stem", RussianStemTokenFilterFactory.class);
}
@ -392,16 +379,34 @@ public class AnalysisModule extends AbstractModule {
analyzersBindings.processAnalyzer("snowball", SnowballAnalyzerProvider.class);
analyzersBindings.processAnalyzer("arabic", ArabicAnalyzerProvider.class);
analyzersBindings.processAnalyzer("armenian", ArmenianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("basque", BasqueAnalyzerProvider.class);
analyzersBindings.processAnalyzer("brazilian", BrazilianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("bulgarian", BulgarianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("catalan", CatalanAnalyzerProvider.class);
analyzersBindings.processAnalyzer("chinese", ChineseAnalyzerProvider.class);
analyzersBindings.processAnalyzer("cjk", CjkAnalyzerProvider.class);
analyzersBindings.processAnalyzer("czech", CzechAnalyzerProvider.class);
analyzersBindings.processAnalyzer("danish", DanishAnalyzerProvider.class);
analyzersBindings.processAnalyzer("dutch", DutchAnalyzerProvider.class);
analyzersBindings.processAnalyzer("english", EnglishAnalyzerProvider.class);
analyzersBindings.processAnalyzer("finnish", FinnishAnalyzerProvider.class);
analyzersBindings.processAnalyzer("french", FrenchAnalyzerProvider.class);
analyzersBindings.processAnalyzer("galician", GalicianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("german", GermanAnalyzerProvider.class);
analyzersBindings.processAnalyzer("greek", GreekAnalyzerProvider.class);
analyzersBindings.processAnalyzer("hindi", HindiAnalyzerProvider.class);
analyzersBindings.processAnalyzer("hungarian", HungarianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("indonesian", IndonesianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("italian", ItalianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("norwegian", NorwegianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("persian", PersianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("portuguese", PortugueseAnalyzerProvider.class);
analyzersBindings.processAnalyzer("romanian", RomanianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("russian", RussianAnalyzerProvider.class);
analyzersBindings.processAnalyzer("spanish", SpanishAnalyzerProvider.class);
analyzersBindings.processAnalyzer("swedish", SwedishAnalyzerProvider.class);
analyzersBindings.processAnalyzer("turkish", TurkishAnalyzerProvider.class);
analyzersBindings.processAnalyzer("thai", ThaiAnalyzerProvider.class);
}
}

View File

@ -89,6 +89,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) {
NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get());
analyzers.put(analyzerFactory.name(), analyzer);
analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer);
String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
if (strAliases != null) {
for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
@ -116,6 +117,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, tokenizerSettings);
tokenizers.put(tokenizerName, tokenizerFactory);
tokenizers.put(Strings.toCamelCase(tokenizerName), tokenizerFactory);
}
}
this.tokenizers = ImmutableMap.copyOf(tokenizers);
@ -134,6 +136,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
CharFilterFactory tokenFilterFactory = charFilterFactoryFactory.create(charFilterName, charFilterSettings);
charFilters.put(charFilterName, tokenFilterFactory);
charFilters.put(Strings.toCamelCase(charFilterName), tokenFilterFactory);
}
}
this.charFilters = ImmutableMap.copyOf(charFilters);
@ -152,6 +155,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
TokenFilterFactory tokenFilterFactory = tokenFilterFactoryFactory.create(tokenFilterName, tokenFilterSettings);
tokenFilters.put(tokenFilterName, tokenFilterFactory);
tokenFilters.put(Strings.toCamelCase(tokenFilterName), tokenFilterFactory);
}
}
this.tokenFilters = ImmutableMap.copyOf(tokenFilters);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -26,8 +27,6 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
@ -37,8 +36,9 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
@Inject public ArabicAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet());
arabicAnalyzer = new ArabicAnalyzer(version, stopWords);
arabicAnalyzer = new ArabicAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public ArabicAnalyzer get() {

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArmenianAnalyzer> {
private final ArmenianAnalyzer analyzer;
@Inject public ArmenianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new ArmenianAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public ArmenianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<BasqueAnalyzer> {
private final BasqueAnalyzer analyzer;
@Inject public BasqueAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new BasqueAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public BasqueAnalyzer get() {
return this.analyzer;
}
}

View File

@ -19,36 +19,26 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BrazilianAnalyzer> {
private final Set<?> stemExclusion;
private final BrazilianAnalyzer analyzer;
@Inject public BrazilianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, BrazilianAnalyzer.getDefaultStopSet());
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new BrazilianAnalyzer(version, stopWords, this.stemExclusion);
analyzer = new BrazilianAnalyzer(version,
Analysis.parseStopWords(settings, BrazilianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public BrazilianAnalyzer get() {

View File

@ -0,0 +1,47 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BulgarianAnalyzer> {
private final BulgarianAnalyzer analyzer;
@Inject public BulgarianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new BulgarianAnalyzer(version,
Analysis.parseStopWords(settings, BulgarianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public BulgarianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<CatalanAnalyzer> {
private final CatalanAnalyzer analyzer;
@Inject public CatalanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new CatalanAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public CatalanAnalyzer get() {
return this.analyzer;
}
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -26,8 +27,6 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
@ -37,9 +36,9 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
@Inject public CzechAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, CzechAnalyzer.getDefaultStopSet());
analyzer = new CzechAnalyzer(version, stopWords);
analyzer = new CzechAnalyzer(version,
Analysis.parseStopWords(settings, CzechAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public CzechAnalyzer get() {

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<DanishAnalyzer> {
private final DanishAnalyzer analyzer;
@Inject public DanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new DanishAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public DanishAnalyzer get() {
return this.analyzer;
}
}

View File

@ -19,37 +19,26 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAnalyzer> {
private final Set<?> stemExclusion;
private final DutchAnalyzer analyzer;
@Inject public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, DutchAnalyzer.getDefaultStopSet());
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new DutchAnalyzer(version, stopWords, this.stemExclusion);
analyzer = new DutchAnalyzer(version,
Analysis.parseStopWords(settings, DutchAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public DutchAnalyzer get() {

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<EnglishAnalyzer> {
private final EnglishAnalyzer analyzer;
@Inject public EnglishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new EnglishAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public EnglishAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<FinnishAnalyzer> {
private final FinnishAnalyzer analyzer;
@Inject public FinnishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new FinnishAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public FinnishAnalyzer get() {
return this.analyzer;
}
}

View File

@ -19,37 +19,26 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<FrenchAnalyzer> {
private final Set<?> stemExclusion;
private final FrenchAnalyzer analyzer;
@Inject public FrenchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, FrenchAnalyzer.getDefaultStopSet());
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new FrenchAnalyzer(version, stopWords, this.stemExclusion);
analyzer = new FrenchAnalyzer(version,
Analysis.parseStopWords(settings, FrenchAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public FrenchAnalyzer get() {

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<GalicianAnalyzer> {
private final GalicianAnalyzer analyzer;
@Inject public GalicianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new GalicianAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public GalicianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -19,37 +19,26 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<GermanAnalyzer> {
private final Set<?> stemExclusion;
private final GermanAnalyzer analyzer;
@Inject public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, GermanAnalyzer.getDefaultStopSet());
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new GermanAnalyzer(version, stopWords, this.stemExclusion);
analyzer = new GermanAnalyzer(version,
Analysis.parseStopWords(settings, GermanAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public GermanAnalyzer get() {

View File

@ -26,8 +26,6 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
@ -37,9 +35,8 @@ public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider<GreekAn
@Inject public GreekAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, GreekAnalyzer.getDefaultStopSet());
analyzer = new GreekAnalyzer(version, stopWords);
analyzer = new GreekAnalyzer(version,
Analysis.parseStopWords(settings, GreekAnalyzer.getDefaultStopSet()));
}
@Override public GreekAnalyzer get() {

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider<HindiAnalyzer> {
private final HindiAnalyzer analyzer;
@Inject public HindiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new HindiAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public HindiAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<HungarianAnalyzer> {
private final HungarianAnalyzer analyzer;
@Inject public HungarianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new HungarianAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public HungarianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider<IndonesianAnalyzer> {
private final IndonesianAnalyzer analyzer;
@Inject public IndonesianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new IndonesianAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public IndonesianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider<ItalianAnalyzer> {
private final ItalianAnalyzer analyzer;
@Inject public ItalianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new ItalianAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public ItalianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider<NorwegianAnalyzer> {
private final NorwegianAnalyzer analyzer;
@Inject public NorwegianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new NorwegianAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public NorwegianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -26,8 +26,6 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
@ -37,9 +35,8 @@ public class PersianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Persi
@Inject public PersianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(settings, PersianAnalyzer.getDefaultStopSet());
analyzer = new PersianAnalyzer(version, stopWords);
analyzer = new PersianAnalyzer(version,
Analysis.parseStopWords(settings, PersianAnalyzer.getDefaultStopSet()));
}
@Override public PersianAnalyzer get() {

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider<PortugueseAnalyzer> {
private final PortugueseAnalyzer analyzer;
@Inject public PortugueseAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new PortugueseAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public PortugueseAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<RomanianAnalyzer> {
private final RomanianAnalyzer analyzer;
@Inject public RomanianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new RomanianAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public RomanianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -19,16 +19,14 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
@ -38,16 +36,9 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Russi
@Inject public RussianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
if (Analysis.isNoStopwords(settings)) {
analyzer = new RussianAnalyzer(version, ImmutableSet.of());
} else {
Set<?> stopWords = Analysis.parseStopWords(settings, ImmutableSet.of());
if (!stopWords.isEmpty()) {
analyzer = new RussianAnalyzer(version, stopWords);
} else {
analyzer = new RussianAnalyzer(version);
}
}
analyzer = new RussianAnalyzer(version,
Analysis.parseStopWords(settings, RussianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public RussianAnalyzer get() {

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -39,7 +40,7 @@ public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory {
@Inject public SnowballTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
this.language = settings.get("language", settings.get("name", "English"));
this.language = Strings.capitalize(settings.get("language", settings.get("name", "English")));
}
@Override public TokenStream create(TokenStream tokenStream) {

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<SpanishAnalyzer> {
private final SpanishAnalyzer analyzer;
@Inject public SpanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new SpanishAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public SpanishAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,145 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanLightStemFilter;
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
import org.apache.lucene.analysis.hi.HindiStemFilter;
import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
import org.apache.lucene.analysis.id.IndonesianStemFilter;
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
import org.apache.lucene.analysis.pt.PortugueseStemFilter;
import org.apache.lucene.analysis.ru.RussianLightStemFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.tartarus.snowball.ext.*;
/**
*/
public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
private String language;
@Inject public StemmerTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
}
@Override public TokenStream create(TokenStream tokenStream) {
if ("armenian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new ArmenianStemmer());
} else if ("basque".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new BasqueStemmer());
} else if ("catalan".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new CatalanStemmer());
} else if ("danish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new DanishStemmer());
} else if ("dutch".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new DutchStemmer());
} else if ("english".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new EnglishStemmer());
} else if ("finnish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new FinnishStemmer());
} else if ("french".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new FrenchStemmer());
} else if ("german".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new GermanStemmer());
} else if ("german2".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new German2Stemmer());
} else if ("hungarian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new HungarianStemmer());
} else if ("italian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new ItalianStemmer());
} else if ("kp".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new KpStemmer());
} else if ("lovins".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new LovinsStemmer());
} else if ("norwegian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new NorwegianStemmer());
} else if ("porter".equalsIgnoreCase(language)) {
return new PorterStemFilter(tokenStream);
} else if ("porter2".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new PorterStemmer());
} else if ("portuguese".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new PortugueseStemmer());
} else if ("romanian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new RomanianStemmer());
} else if ("russian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new RussianStemmer());
} else if ("spanish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new SpanishStemmer());
} else if ("swedish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new SwedishStemmer());
} else if ("turkish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new TurkishStemmer());
} else if ("minimal_english".equalsIgnoreCase(language) || "minimalEnglish".equalsIgnoreCase(language)) {
return new EnglishMinimalStemFilter(tokenStream);
} else if ("possessive_english".equalsIgnoreCase(language) || "possessiveEnglish".equalsIgnoreCase(language)) {
return new EnglishPossessiveFilter(tokenStream);
} else if ("light_finish".equalsIgnoreCase(language) || "lightFinish".equalsIgnoreCase(language)) {
return new FinnishLightStemFilter(tokenStream);
} else if ("light_french".equalsIgnoreCase(language) || "lightFrench".equalsIgnoreCase(language)) {
return new FrenchLightStemFilter(tokenStream);
} else if ("minimal_french".equalsIgnoreCase(language) || "minimalFrench".equalsIgnoreCase(language)) {
return new FrenchMinimalStemFilter(tokenStream);
} else if ("light_german".equalsIgnoreCase(language) || "lightGerman".equalsIgnoreCase(language)) {
return new GermanLightStemFilter(tokenStream);
} else if ("minimal_german".equalsIgnoreCase(language) || "minimalGerman".equalsIgnoreCase(language)) {
return new GermanMinimalStemFilter(tokenStream);
} else if ("hindi".equalsIgnoreCase(language)) {
return new HindiStemFilter(tokenStream);
} else if ("light_hungarian".equalsIgnoreCase(language) || "lightHungarian".equalsIgnoreCase(language)) {
return new HungarianLightStemFilter(tokenStream);
} else if ("indonesian".equalsIgnoreCase(language)) {
return new IndonesianStemFilter(tokenStream);
} else if ("light_italian".equalsIgnoreCase(language) || "lightItalian".equalsIgnoreCase(language)) {
return new ItalianLightStemFilter(tokenStream);
} else if ("light_portuguese".equalsIgnoreCase(language) || "lightPortuguese".equalsIgnoreCase(language)) {
return new PortugueseLightStemFilter(tokenStream);
} else if ("minimal_portuguese".equalsIgnoreCase(language) || "minimalPortuguese".equalsIgnoreCase(language)) {
return new PortugueseMinimalStemFilter(tokenStream);
} else if ("portuguese".equalsIgnoreCase(language)) {
return new PortugueseStemFilter(tokenStream);
} else if ("light_russian".equalsIgnoreCase(language) || "lightRussian".equalsIgnoreCase(language)) {
return new RussianLightStemFilter(tokenStream);
} else if ("light_spanish".equalsIgnoreCase(language) || "lightSpanish".equalsIgnoreCase(language)) {
return new SpanishLightStemFilter(tokenStream);
} else if ("light_swedish".equalsIgnoreCase(language) || "lightSwedish".equalsIgnoreCase(language)) {
return new SpanishLightStemFilter(tokenStream);
}
return new SnowballFilter(tokenStream, language);
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider<SwedishAnalyzer> {
private final SwedishAnalyzer analyzer;
@Inject public SwedishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new SwedishAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public SwedishAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
/**
* @author kimchy (shay.banon)
*/
public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider<TurkishAnalyzer> {
private final TurkishAnalyzer analyzer;
@Inject public TurkishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new TurkishAnalyzer(version,
Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
}
@Override public TurkishAnalyzer get() {
return this.analyzer;
}
}

View File

@ -22,17 +22,30 @@ package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.fr.FrenchStemFilter;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
@ -40,8 +53,11 @@ import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.nl.DutchStemFilter;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.ru.RussianStemFilter;
import org.apache.lucene.analysis.shingle.ShingleFilter;
@ -51,7 +67,9 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
@ -98,18 +116,36 @@ public class IndicesAnalysisService extends AbstractComponent {
analyzerProviderFactories.put("pattern", new PreBuiltAnalyzerProviderFactory("pattern", AnalyzerScope.INDICES, new PatternAnalyzer(Lucene.ANALYZER_VERSION, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
analyzerProviderFactories.put("snowball", new PreBuiltAnalyzerProviderFactory("snowball", AnalyzerScope.INDICES, new SnowballAnalyzer(Lucene.ANALYZER_VERSION, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
analyzerProviderFactories.put("standard_html_strip", new PreBuiltAnalyzerProviderFactory("standard_html_strip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("standardHtmlStrip", new PreBuiltAnalyzerProviderFactory("standardHtmlStrip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("armenian", new PreBuiltAnalyzerProviderFactory("armenian", AnalyzerScope.INDICES, new ArmenianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("basque", new PreBuiltAnalyzerProviderFactory("basque", AnalyzerScope.INDICES, new BasqueAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("brazilian", new PreBuiltAnalyzerProviderFactory("brazilian", AnalyzerScope.INDICES, new BrazilianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("bulgarian", new PreBuiltAnalyzerProviderFactory("bulgarian", AnalyzerScope.INDICES, new BulgarianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("catalan", new PreBuiltAnalyzerProviderFactory("catalan", AnalyzerScope.INDICES, new CatalanAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("chinese", new PreBuiltAnalyzerProviderFactory("chinese", AnalyzerScope.INDICES, new ChineseAnalyzer()));
analyzerProviderFactories.put("cjk", new PreBuiltAnalyzerProviderFactory("cjk", AnalyzerScope.INDICES, new ChineseAnalyzer()));
analyzerProviderFactories.put("czech", new PreBuiltAnalyzerProviderFactory("czech", AnalyzerScope.INDICES, new CzechAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("dutch", new PreBuiltAnalyzerProviderFactory("dutch", AnalyzerScope.INDICES, new DutchAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("danish", new PreBuiltAnalyzerProviderFactory("danish", AnalyzerScope.INDICES, new DanishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("english", new PreBuiltAnalyzerProviderFactory("english", AnalyzerScope.INDICES, new EnglishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("finnish", new PreBuiltAnalyzerProviderFactory("finnish", AnalyzerScope.INDICES, new FinnishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("french", new PreBuiltAnalyzerProviderFactory("french", AnalyzerScope.INDICES, new FrenchAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("galician", new PreBuiltAnalyzerProviderFactory("galician", AnalyzerScope.INDICES, new GalicianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("german", new PreBuiltAnalyzerProviderFactory("german", AnalyzerScope.INDICES, new GermanAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("greek", new PreBuiltAnalyzerProviderFactory("greek", AnalyzerScope.INDICES, new GreekAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("persian", new PreBuiltAnalyzerProviderFactory("persian", AnalyzerScope.INDICES, new PersianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("portuguese", new PreBuiltAnalyzerProviderFactory("portuguese", AnalyzerScope.INDICES, new PortugueseAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("romanian", new PreBuiltAnalyzerProviderFactory("romanian", AnalyzerScope.INDICES, new RomanianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("russian", new PreBuiltAnalyzerProviderFactory("russian", AnalyzerScope.INDICES, new RussianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("spanish", new PreBuiltAnalyzerProviderFactory("spanish", AnalyzerScope.INDICES, new SpanishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("swedish", new PreBuiltAnalyzerProviderFactory("swedish", AnalyzerScope.INDICES, new SwedishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("turkish", new PreBuiltAnalyzerProviderFactory("turkish", AnalyzerScope.INDICES, new TurkishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("thai", new PreBuiltAnalyzerProviderFactory("thai", AnalyzerScope.INDICES, new ThaiAnalyzer(Lucene.ANALYZER_VERSION)));
// Base Tokenizers
@ -133,16 +169,6 @@ public class IndicesAnalysisService extends AbstractComponent {
}
}));
tokenizerFactories.put("uaxUrlEmail", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override public String name() {
return "uaxUrlEmail";
}
@Override public Tokenizer create(Reader reader) {
return new UAX29URLEmailTokenizer(reader);
}
}));
tokenizerFactories.put("path_hierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override public String name() {
return "path_hierarchy";
@ -153,16 +179,6 @@ public class IndicesAnalysisService extends AbstractComponent {
}
}));
tokenizerFactories.put("pathHierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override public String name() {
return "pathHierarchy";
}
@Override public Tokenizer create(Reader reader) {
return new PathHierarchyTokenizer(reader);
}
}));
tokenizerFactories.put("keyword", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override public String name() {
return "keyword";
@ -294,16 +310,6 @@ public class IndicesAnalysisService extends AbstractComponent {
}
}));
tokenFilterFactories.put("porterStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "porterStem";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new PorterStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("porter_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "porter_stem";
@ -384,13 +390,13 @@ public class IndicesAnalysisService extends AbstractComponent {
return new SnowballFilter(tokenStream, "English");
}
}));
tokenFilterFactories.put("arabicStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
tokenFilterFactories.put("stemmer", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "arabicStem";
return "stemmer";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new ArabicStemFilter(tokenStream);
return new PorterStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@ -402,15 +408,6 @@ public class IndicesAnalysisService extends AbstractComponent {
return new ArabicStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("brazilianStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "brazilianStem";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new BrazilianStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("brazilian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "brazilian_stem";
@ -420,15 +417,6 @@ public class IndicesAnalysisService extends AbstractComponent {
return new BrazilianStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("czechStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "czechStem";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new CzechStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("czech_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "czech_stem";
@ -438,15 +426,6 @@ public class IndicesAnalysisService extends AbstractComponent {
return new CzechStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("dutchStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "dutchStem";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new DutchStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("dutch_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "dutch_stem";
@ -456,15 +435,6 @@ public class IndicesAnalysisService extends AbstractComponent {
return new DutchStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("frenchStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "frenchStem";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new FrenchStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("french_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "french_stem";
@ -474,15 +444,6 @@ public class IndicesAnalysisService extends AbstractComponent {
return new FrenchStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("germanStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "germanStem";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new GermanStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("german_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "german_stem";
@ -492,15 +453,6 @@ public class IndicesAnalysisService extends AbstractComponent {
return new GermanStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("russianStem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "russianStem";
}
@Override public TokenStream create(TokenStream tokenStream) {
return new RussianStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("russian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "russian_stem";