Refactoring IndicesAnalysisService
Using enums where possible in order to clean up the code in IndicesAnalysisService Also introduced a simpler generic caching mechanism, and tests.
This commit is contained in:
parent
adb83182a5
commit
8257370415
|
@ -19,7 +19,12 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
public class PreBuiltCharFilterFactoryFactory implements CharFilterFactoryFactory {
|
||||
|
||||
|
@ -31,6 +36,11 @@ public class PreBuiltCharFilterFactoryFactory implements CharFilterFactoryFactor
|
|||
|
||||
@Override
|
||||
public CharFilterFactory create(String name, Settings settings) {
|
||||
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
|
||||
if (!Version.CURRENT.equals(indexVersion)) {
|
||||
return PreBuiltCharFilters.valueOf(name.toUpperCase(Locale.ROOT)).getCharFilterFactory(indexVersion);
|
||||
}
|
||||
|
||||
return charFilterFactory;
|
||||
}
|
||||
}
|
|
@ -19,7 +19,12 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltTokenFilters;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
public class PreBuiltTokenFilterFactoryFactory implements TokenFilterFactoryFactory {
|
||||
|
||||
|
@ -31,6 +36,10 @@ public class PreBuiltTokenFilterFactoryFactory implements TokenFilterFactoryFact
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory create(String name, Settings settings) {
|
||||
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
|
||||
if (!Version.CURRENT.equals(indexVersion)) {
|
||||
return PreBuiltTokenFilters.valueOf(name.toUpperCase(Locale.ROOT)).getTokenFilterFactory(indexVersion);
|
||||
}
|
||||
return tokenFilterFactory;
|
||||
}
|
||||
}
|
|
@ -19,7 +19,12 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
public class PreBuiltTokenizerFactoryFactory implements TokenizerFactoryFactory {
|
||||
|
||||
|
@ -31,6 +36,12 @@ public class PreBuiltTokenizerFactoryFactory implements TokenizerFactoryFactory
|
|||
|
||||
@Override
|
||||
public TokenizerFactory create(String name, Settings settings) {
|
||||
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
|
||||
if (!Version.CURRENT.equals(indexVersion)) {
|
||||
TokenizerFactory versionedTokenizerFactory = PreBuiltTokenizers.valueOf(name.toUpperCase(Locale.ROOT)).getTokenizerFactory(indexVersion);
|
||||
return versionedTokenizerFactory;
|
||||
}
|
||||
|
||||
return tokenizerFactory;
|
||||
}
|
||||
}
|
|
@ -82,574 +82,40 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
public IndicesAnalysisService(Settings settings) {
|
||||
super(settings);
|
||||
|
||||
// Analyzers
|
||||
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
|
||||
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
|
||||
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
|
||||
}
|
||||
|
||||
// Base Tokenizers
|
||||
tokenizerFactories.put("standard", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "standard";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new StandardTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("classic", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "classic";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new ClassicTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("uax_url_email", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "uax_url_email";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new UAX29URLEmailTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("path_hierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "path_hierarchy";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new PathHierarchyTokenizer(reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("keyword", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "keyword";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new KeywordTokenizer(reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("letter", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "letter";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new LetterTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("lowercase", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "lowercase";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new LowerCaseTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("whitespace", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "whitespace";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("nGram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "nGram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("ngram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "ngram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("edgeNGram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "edgeNGram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("edge_ngram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "edge_ngram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenizerFactories.put("pattern", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "pattern";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new PatternTokenizer(reader, Regex.compile("\\W+", null), -1);
|
||||
}
|
||||
}));
|
||||
|
||||
// Token Filters
|
||||
tokenFilterFactories.put("word_delimiter", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "word_delimiter";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new WordDelimiterFilter(tokenStream,
|
||||
WordDelimiterFilter.GENERATE_WORD_PARTS |
|
||||
WordDelimiterFilter.GENERATE_NUMBER_PARTS |
|
||||
WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
|
||||
WordDelimiterFilter.SPLIT_ON_NUMERICS |
|
||||
WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("stop", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "stop";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new StopFilter(Lucene.ANALYZER_VERSION, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("trim", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "trim";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new TrimFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("reverse", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "reverse";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new ReverseStringFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("asciifolding", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "asciifolding";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new ASCIIFoldingFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("length", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "length";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new LengthFilter(Lucene.ANALYZER_VERSION, tokenStream, 0, Integer.MAX_VALUE);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("common_grams", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "common_grams";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new CommonGramsFilter(Lucene.ANALYZER_VERSION, tokenStream, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("lowercase", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "lowercase";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new LowerCaseFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("kstem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "kstem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new KStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("porter_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "porter_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new PorterStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("standard", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "standard";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new StandardFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("classic", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "classic";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new ClassicFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("nGram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "nGram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("ngram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "ngram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("edgeNGram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "edgeNGram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("edge_ngram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "edge_ngram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("shingle", new PreBuiltTokenFilterFactoryFactory(new ShingleTokenFilterFactory.Factory("shingle")));
|
||||
|
||||
tokenFilterFactories.put("unique", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "unique";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new UniqueTokenFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("truncate", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "truncate";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new TruncateTokenFilter(tokenStream, 10);
|
||||
}
|
||||
}));
|
||||
|
||||
// Extended Token Filters
|
||||
tokenFilterFactories.put("snowball", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "snowball";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new SnowballFilter(tokenStream, "English");
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("stemmer", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "stemmer";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new PorterStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("elision", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "elision";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
// LUCENE 4 UPGRADE: French default for now, make set of articles configurable
|
||||
return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "arabic_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new ArabicStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("brazilian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "brazilian_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new BrazilianStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("czech_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "czech_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new CzechStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("dutch_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "dutch_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new DutchStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("french_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "french_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new FrenchStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("german_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "german_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new GermanStemFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("russian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "russian_stem";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new SnowballFilter(tokenStream, "Russian");
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("keyword_repeat", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "keyword_repeat";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new KeywordRepeatFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("arabic_normalization", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "arabic_normalization";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new ArabicNormalizationFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("persian_normalization", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "persian_normalization";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new PersianNormalizationFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
tokenFilterFactories.put("type_as_payload", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "type_as_payload";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new TypeAsPayloadTokenFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
// Char Filter
|
||||
charFilterFactories.put("html_strip", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "html_strip";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader tokenStream) {
|
||||
return new HTMLStripCharFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
charFilterFactories.put("htmlStrip", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "htmlStrip";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader tokenStream) {
|
||||
return new HTMLStripCharFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
// Tokenizers
|
||||
for (PreBuiltTokenizers preBuiltTokenizer : PreBuiltTokenizers.values()) {
|
||||
String name = preBuiltTokenizer.name().toLowerCase(Locale.ROOT);
|
||||
tokenizerFactories.put(name, new PreBuiltTokenizerFactoryFactory(preBuiltTokenizer.getTokenizerFactory(Version.CURRENT)));
|
||||
}
|
||||
|
||||
// Tokenizer aliases
|
||||
tokenizerFactories.put("nGram", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.NGRAM.getTokenizerFactory(Version.CURRENT)));
|
||||
tokenizerFactories.put("edgeNGram", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.EDGE_NGRAM.getTokenizerFactory(Version.CURRENT)));
|
||||
|
||||
|
||||
// Token filters
|
||||
for (PreBuiltTokenFilters preBuiltTokenFilter : PreBuiltTokenFilters.values()) {
|
||||
String name = preBuiltTokenFilter.name().toLowerCase(Locale.ROOT);
|
||||
tokenFilterFactories.put(name, new PreBuiltTokenFilterFactoryFactory(preBuiltTokenFilter.getTokenFilterFactory(Version.CURRENT)));
|
||||
}
|
||||
// Token filter aliases
|
||||
tokenFilterFactories.put("nGram", new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.NGRAM.getTokenFilterFactory(Version.CURRENT)));
|
||||
tokenFilterFactories.put("edgeNGram", new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.EDGE_NGRAM.getTokenFilterFactory(Version.CURRENT)));
|
||||
|
||||
|
||||
// Char Filters
|
||||
for (PreBuiltCharFilters preBuiltCharFilter : PreBuiltCharFilters.values()) {
|
||||
String name = preBuiltCharFilter.name().toLowerCase(Locale.ROOT);
|
||||
charFilterFactories.put(name, new PreBuiltCharFilterFactoryFactory(preBuiltCharFilter.getCharFilterFactory(Version.CURRENT)));
|
||||
}
|
||||
// Char filter aliases
|
||||
charFilterFactories.put("htmlStrip", new PreBuiltCharFilterFactoryFactory(PreBuiltCharFilters.HTML_STRIP.getCharFilterFactory(Version.CURRENT)));
|
||||
}
|
||||
|
||||
public boolean hasCharFilter(String name) {
|
||||
|
|
|
@ -18,8 +18,6 @@
|
|||
*/
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
|
@ -62,12 +60,10 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
|||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer;
|
||||
|
||||
import java.util.Map;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -373,71 +369,30 @@ public enum PreBuiltAnalyzers {
|
|||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* The strategy of caching the analyzer
|
||||
*
|
||||
* ONE Exactly one version is stored. Useful for analyzers which do not store version information
|
||||
* LUCENE Exactly one version for each lucene version is stored. Useful to prevent different analyzers with the same version
|
||||
* ELASTICSEARCH Exactly one version per elasticsearch version is stored. Useful if you change an analyzer between elasticsearch releases, when the lucene version does not change
|
||||
*/
|
||||
private static enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
|
||||
|
||||
private CachingStrategy cachingStrategy;
|
||||
protected final Map<Version, Analyzer> cachedAnalyzers = Maps.newHashMapWithExpectedSize(2);
|
||||
|
||||
PreBuiltAnalyzers() {
|
||||
this(CachingStrategy.LUCENE);
|
||||
}
|
||||
|
||||
PreBuiltAnalyzers(CachingStrategy cachingStrategy) {
|
||||
this.cachingStrategy = cachingStrategy;
|
||||
}
|
||||
|
||||
abstract protected Analyzer create(Version version);
|
||||
|
||||
public Map<Version, Analyzer> getCachedAnalyzers() {
|
||||
return ImmutableMap.copyOf(cachedAnalyzers);
|
||||
protected final PreBuiltCacheFactory.PreBuiltCache<Analyzer> cache;
|
||||
|
||||
PreBuiltAnalyzers() {
|
||||
this(PreBuiltCacheFactory.CachingStrategy.LUCENE);
|
||||
}
|
||||
|
||||
PreBuiltAnalyzers(PreBuiltCacheFactory.CachingStrategy cachingStrategy) {
|
||||
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
|
||||
}
|
||||
|
||||
PreBuiltCacheFactory.PreBuiltCache<Analyzer> getCache() {
|
||||
return cache;
|
||||
}
|
||||
|
||||
public synchronized Analyzer getAnalyzer(Version version) {
|
||||
Analyzer analyzer = getCachedAnalyzer(version);
|
||||
Analyzer analyzer = cache.get(version);
|
||||
if (analyzer == null) {
|
||||
analyzer = this.create(version);
|
||||
}
|
||||
|
||||
if (!cachedAnalyzers.containsKey(version)) {
|
||||
cachedAnalyzers.put(version, analyzer);
|
||||
cache.put(version, analyzer);
|
||||
}
|
||||
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
private Analyzer getCachedAnalyzer(Version version) {
|
||||
switch (this.cachingStrategy) {
|
||||
case ONE:
|
||||
// method to return the first found analyzer in the cache
|
||||
if (cachedAnalyzers.size() > 0) {
|
||||
return (Analyzer) cachedAnalyzers.values().toArray()[0];
|
||||
}
|
||||
break;
|
||||
case LUCENE:
|
||||
// find already cached analyzers with the same lucene version
|
||||
for (Version elasticsearchVersion : cachedAnalyzers.keySet()) {
|
||||
if (elasticsearchVersion.luceneVersion.equals(version.luceneVersion)) {
|
||||
return cachedAnalyzers.get(elasticsearchVersion);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ELASTICSEARCH:
|
||||
// check only for the same es version
|
||||
if (cachedAnalyzers.containsKey(version)) {
|
||||
return cachedAnalyzers.get(version);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new ElasticSearchException("No action configured for caching strategy[" + this.cachingStrategy + "]");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.Version;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PreBuiltCacheFactory {
|
||||
|
||||
/**
|
||||
* The strategy of caching the analyzer
|
||||
*
|
||||
* ONE Exactly one version is stored. Useful for analyzers which do not store version information
|
||||
* LUCENE Exactly one version for each lucene version is stored. Useful to prevent different analyzers with the same version
|
||||
* ELASTICSEARCH Exactly one version per elasticsearch version is stored. Useful if you change an analyzer between elasticsearch releases, when the lucene version does not change
|
||||
*/
|
||||
static enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
|
||||
|
||||
public interface PreBuiltCache<T> {
|
||||
T get(Version version);
|
||||
void put(Version version, T t);
|
||||
}
|
||||
|
||||
private PreBuiltCacheFactory() {}
|
||||
|
||||
static <T> PreBuiltCache<T> getCache(CachingStrategy cachingStrategy) {
|
||||
switch (cachingStrategy) {
|
||||
case ONE:
|
||||
return new PreBuiltCacheStrategyOne<T>();
|
||||
case LUCENE:
|
||||
return new PreBuiltCacheStrategyLucene<T>();
|
||||
case ELASTICSEARCH:
|
||||
return new PreBuiltCacheStrategyElasticsearch<T>();
|
||||
default:
|
||||
throw new ElasticSearchException("No action configured for caching strategy[" + cachingStrategy + "]");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a pretty simple cache, it only contains one version
|
||||
*/
|
||||
private static class PreBuiltCacheStrategyOne<T> implements PreBuiltCache<T> {
|
||||
|
||||
private T model = null;
|
||||
|
||||
@Override
|
||||
public T get(Version version) {
|
||||
return model;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void put(Version version, T model) {
|
||||
this.model = model;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This cache contains one version for each elasticsearch version object
|
||||
*/
|
||||
private static class PreBuiltCacheStrategyElasticsearch<T> implements PreBuiltCache<T> {
|
||||
|
||||
Map<Version, T> mapModel = Maps.newHashMapWithExpectedSize(2);
|
||||
|
||||
@Override
|
||||
public T get(Version version) {
|
||||
return mapModel.get(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void put(Version version, T model) {
|
||||
mapModel.put(version, model);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This cache uses the lucene version for caching
|
||||
*/
|
||||
private static class PreBuiltCacheStrategyLucene<T> implements PreBuiltCache<T> {
|
||||
|
||||
private Map<org.apache.lucene.util.Version, T> mapModel = Maps.newHashMapWithExpectedSize(2);
|
||||
|
||||
@Override
|
||||
public T get(Version version) {
|
||||
return mapModel.get(version.luceneVersion);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void put(org.elasticsearch.Version version, T model) {
|
||||
mapModel.put(version.luceneVersion, model);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public enum PreBuiltCharFilters {
|
||||
|
||||
HTML_STRIP(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public Reader create(Reader tokenStream, Version version) {
|
||||
return new HTMLStripCharFilter(tokenStream);
|
||||
}
|
||||
};
|
||||
|
||||
abstract public Reader create(Reader tokenStream, Version version);
|
||||
|
||||
protected final PreBuiltCacheFactory.PreBuiltCache<CharFilterFactory> cache;
|
||||
|
||||
PreBuiltCharFilters(CachingStrategy cachingStrategy) {
|
||||
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
|
||||
}
|
||||
|
||||
public synchronized CharFilterFactory getCharFilterFactory(final Version version) {
|
||||
CharFilterFactory charFilterFactory = cache.get(version);
|
||||
if (charFilterFactory == null) {
|
||||
final String finalName = name();
|
||||
|
||||
charFilterFactory = new CharFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return finalName.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader tokenStream) {
|
||||
return valueOf(finalName).create(tokenStream, version);
|
||||
}
|
||||
};
|
||||
cache.put(version, charFilterFactory);
|
||||
}
|
||||
|
||||
return charFilterFactory;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,312 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.en.KStemFilter;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.*;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
||||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.ClassicFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public enum PreBuiltTokenFilters {
|
||||
|
||||
WORD_DELIMITER(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new WordDelimiterFilter(tokenStream,
|
||||
WordDelimiterFilter.GENERATE_WORD_PARTS |
|
||||
WordDelimiterFilter.GENERATE_NUMBER_PARTS |
|
||||
WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
|
||||
WordDelimiterFilter.SPLIT_ON_NUMERICS |
|
||||
WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
|
||||
}
|
||||
},
|
||||
|
||||
STOP(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new StopFilter(version.luceneVersion, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
}
|
||||
},
|
||||
|
||||
TRIM(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new TrimFilter(version.luceneVersion, tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
REVERSE(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new ReverseStringFilter(version.luceneVersion, tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
ASCIIFOLDING(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new ASCIIFoldingFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
LENGTH(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new LengthFilter(version.luceneVersion, tokenStream, 0, Integer.MAX_VALUE);
|
||||
}
|
||||
},
|
||||
|
||||
COMMON_GRAMS(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new CommonGramsFilter(version.luceneVersion, tokenStream, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
},
|
||||
|
||||
LOWERCASE(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new LowerCaseFilter(version.luceneVersion, tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
KSTEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new KStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
PORTER_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new PorterStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
STANDARD(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new StandardFilter(version.luceneVersion, tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
CLASSIC(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new ClassicFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
NGRAM(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new NGramTokenFilter(version.luceneVersion, tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
EDGE_NGRAM(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new EdgeNGramTokenFilter(version.luceneVersion, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
},
|
||||
|
||||
UNIQUE(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new UniqueTokenFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
TRUNCATE(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new TruncateTokenFilter(tokenStream, 10);
|
||||
}
|
||||
},
|
||||
|
||||
// Extended Token Filters
|
||||
SNOWBALL(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new SnowballFilter(tokenStream, "English");
|
||||
}
|
||||
},
|
||||
|
||||
STEMMER(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new PorterStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
ELISION(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES);
|
||||
}
|
||||
},
|
||||
|
||||
ARABIC_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new ArabicStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
BRAZILIAN_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new BrazilianStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
CZECH_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new CzechStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
DUTCH_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new DutchStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
FRENCH_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new FrenchStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
GERMAN_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new GermanStemFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
RUSSIAN_STEM(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new SnowballFilter(tokenStream, "Russian");
|
||||
}
|
||||
},
|
||||
|
||||
KEYWORD_REPEAT(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new KeywordRepeatFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
ARABIC_NORMALIZATION(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new ArabicNormalizationFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
PERSIAN_NORMALIZATION(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new PersianNormalizationFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
TYPE_AS_PAYLOAD(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new TypeAsPayloadTokenFilter(tokenStream);
|
||||
}
|
||||
},
|
||||
|
||||
SHINGLE(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new ShingleFilter(tokenStream);
|
||||
}
|
||||
};
|
||||
|
||||
abstract public TokenStream create(TokenStream tokenStream, Version version);
|
||||
|
||||
protected final PreBuiltCacheFactory.PreBuiltCache<TokenFilterFactory> cache;
|
||||
|
||||
|
||||
PreBuiltTokenFilters(CachingStrategy cachingStrategy) {
|
||||
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
|
||||
}
|
||||
|
||||
public synchronized TokenFilterFactory getTokenFilterFactory(final Version version) {
|
||||
TokenFilterFactory factory = cache.get(version);
|
||||
if (factory == null) {
|
||||
final String finalName = name();
|
||||
factory = new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return finalName.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return valueOf(finalName).create(tokenStream, version);
|
||||
}
|
||||
};
|
||||
cache.put(version, factory);
|
||||
}
|
||||
|
||||
return factory;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
||||
import org.apache.lucene.analysis.pattern.PatternTokenizer;
|
||||
import org.apache.lucene.analysis.standard.ClassicTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public enum PreBuiltTokenizers {
|
||||
|
||||
STANDARD(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new StandardTokenizer(version.luceneVersion, reader);
|
||||
}
|
||||
},
|
||||
|
||||
CLASSIC(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new ClassicTokenizer(version.luceneVersion, reader);
|
||||
}
|
||||
},
|
||||
|
||||
UAX_URL_EMAIL(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new UAX29URLEmailTokenizer(version.luceneVersion, reader);
|
||||
}
|
||||
},
|
||||
|
||||
PATH_HIERARCHY(CachingStrategy.ONE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new PathHierarchyTokenizer(reader);
|
||||
}
|
||||
},
|
||||
|
||||
KEYWORD(CachingStrategy.ONE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new KeywordTokenizer(reader);
|
||||
}
|
||||
},
|
||||
|
||||
LETTER(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new LetterTokenizer(version.luceneVersion, reader);
|
||||
}
|
||||
},
|
||||
|
||||
LOWERCASE(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new LowerCaseTokenizer(version.luceneVersion, reader);
|
||||
}
|
||||
},
|
||||
|
||||
WHITESPACE(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new WhitespaceTokenizer(version.luceneVersion, reader);
|
||||
}
|
||||
},
|
||||
|
||||
NGRAM(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new NGramTokenizer(version.luceneVersion, reader);
|
||||
}
|
||||
},
|
||||
|
||||
EDGE_NGRAM(CachingStrategy.LUCENE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new EdgeNGramTokenizer(version.luceneVersion, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
},
|
||||
|
||||
PATTERN(CachingStrategy.ONE) {
|
||||
@Override
|
||||
protected Tokenizer create(Reader reader, Version version) {
|
||||
return new PatternTokenizer(reader, Regex.compile("\\W+", null), -1);
|
||||
}
|
||||
};
|
||||
|
||||
abstract protected Tokenizer create(Reader reader, Version version);
|
||||
|
||||
protected final PreBuiltCacheFactory.PreBuiltCache<TokenizerFactory> cache;
|
||||
|
||||
|
||||
PreBuiltTokenizers(CachingStrategy cachingStrategy) {
|
||||
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
|
||||
}
|
||||
|
||||
public synchronized TokenizerFactory getTokenizerFactory(final Version version) {
|
||||
TokenizerFactory tokenizerFactory = cache.get(version);
|
||||
if (tokenizerFactory == null) {
|
||||
final String finalName = name();
|
||||
|
||||
tokenizerFactory = new TokenizerFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return finalName.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return valueOf(finalName).create(reader, version);
|
||||
}
|
||||
};
|
||||
cache.put(version, tokenizerFactory);
|
||||
}
|
||||
|
||||
return tokenizerFactory;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.*;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PreBuiltCharFilterFactoryFactoryTests {
|
||||
|
||||
@Test
|
||||
public void testThatDifferentVersionsCanBeLoaded() {
|
||||
PreBuiltCharFilterFactoryFactory factory = new PreBuiltCharFilterFactoryFactory(PreBuiltCharFilters.HTML_STRIP.getCharFilterFactory(Version.CURRENT));
|
||||
|
||||
CharFilterFactory emptySettingsTokenizerFactory = factory.create("html_strip", ImmutableSettings.EMPTY);
|
||||
CharFilterFactory former090TokenizerFactory = factory.create("html_strip", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build());
|
||||
CharFilterFactory former090TokenizerFactoryCopy = factory.create("html_strip", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build());
|
||||
CharFilterFactory currentTokenizerFactory = factory.create("html_strip", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
|
||||
|
||||
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
|
||||
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactory));
|
||||
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactoryCopy));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltTokenFilters;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PreBuiltTokenFilterFactoryFactoryTests extends ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void testThatCachingWorksForCachingStrategyOne() {
|
||||
PreBuiltTokenFilterFactoryFactory factory = new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.WORD_DELIMITER.getTokenFilterFactory(Version.CURRENT));
|
||||
|
||||
TokenFilterFactory emptySettingsTokenizerFactory = factory.create("word_delimiter", ImmutableSettings.EMPTY);
|
||||
TokenFilterFactory former090TokenizerFactory = factory.create("word_delimiter", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_1).build());
|
||||
TokenFilterFactory former090TokenizerFactoryCopy = factory.create("word_delimiter", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_2).build());
|
||||
TokenFilterFactory currentTokenizerFactory = factory.create("word_delimiter", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
|
||||
|
||||
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
|
||||
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactory));
|
||||
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactoryCopy));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatDifferentVersionsCanBeLoaded() {
|
||||
PreBuiltTokenFilterFactoryFactory factory = new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.STOP.getTokenFilterFactory(Version.CURRENT));
|
||||
|
||||
TokenFilterFactory emptySettingsTokenizerFactory = factory.create("stop", ImmutableSettings.EMPTY);
|
||||
TokenFilterFactory former090TokenizerFactory = factory.create("stop", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_1).build());
|
||||
TokenFilterFactory former090TokenizerFactoryCopy = factory.create("stop", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_2).build());
|
||||
TokenFilterFactory currentTokenizerFactory = factory.create("stop", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
|
||||
|
||||
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
|
||||
assertThat(emptySettingsTokenizerFactory, is(not(former090TokenizerFactory)));
|
||||
assertThat(former090TokenizerFactory, is(former090TokenizerFactoryCopy));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PreBuiltTokenizerFactoryFactoryTests extends ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void testThatDifferentVersionsCanBeLoaded() {
|
||||
PreBuiltTokenizerFactoryFactory factory = new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.STANDARD.getTokenizerFactory(Version.CURRENT));
|
||||
|
||||
TokenizerFactory emptySettingsTokenizerFactory = factory.create("standard", ImmutableSettings.EMPTY);
|
||||
// different es versions, same lucene version, thus cached
|
||||
TokenizerFactory former090TokenizerFactory = factory.create("standard", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_1).build());
|
||||
TokenizerFactory former090TokenizerFactoryCopy = factory.create("standard", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_2).build());
|
||||
TokenizerFactory currentTokenizerFactory = factory.create("standard", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
|
||||
|
||||
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
|
||||
assertThat(emptySettingsTokenizerFactory, is(not(former090TokenizerFactory)));
|
||||
assertThat(emptySettingsTokenizerFactory, is(not(former090TokenizerFactoryCopy)));
|
||||
assertThat(former090TokenizerFactory, is(former090TokenizerFactoryCopy));
|
||||
}
|
||||
|
||||
}
|
|
@ -16,7 +16,7 @@
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
@ -26,7 +26,6 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
|
|||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -36,7 +35,8 @@ import java.util.Locale;
|
|||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -105,36 +105,35 @@ public class PreBuiltAnalyzerIntegrationTests extends ElasticsearchIntegrationTe
|
|||
assertThatAnalyzersHaveBeenLoaded(loadedAnalyzers);
|
||||
|
||||
// check that all of the prebuiltanalyzers are still open
|
||||
for (PreBuiltAnalyzers preBuiltAnalyzer : PreBuiltAnalyzers.values()) {
|
||||
assertLuceneAnalyzerIsNotClosed(preBuiltAnalyzer);
|
||||
}
|
||||
assertLuceneAnalyzersAreNotClosed(loadedAnalyzers);
|
||||
}
|
||||
|
||||
private void assertThatAnalyzersHaveBeenLoaded(Map<PreBuiltAnalyzers, List<Version>> expectedLoadedAnalyzers) {
|
||||
for (Map.Entry<PreBuiltAnalyzers, List<Version>> entry : expectedLoadedAnalyzers.entrySet()) {
|
||||
Map<Version, Analyzer> cachedAnalyzers = entry.getKey().getCachedAnalyzers();
|
||||
assertThat(cachedAnalyzers.keySet(), hasItems(entry.getValue().toArray(new Version[]{})));
|
||||
/*for (Version expectedVersion : entry.getValue()) {
|
||||
assertThat(cachedAnalyzers, contains(ex))
|
||||
for (Version version : entry.getValue()) {
|
||||
// if it is not null in the cache, it has been loaded
|
||||
assertThat(entry.getKey().getCache().get(version), is(notNullValue()));
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
// the close() method of a lucene analyzer sets the storedValue field to null
|
||||
// we simply check this via reflection - ugly but works
|
||||
private void assertLuceneAnalyzerIsNotClosed(PreBuiltAnalyzers preBuiltAnalyzer) throws IllegalAccessException, NoSuchFieldException {
|
||||
private void assertLuceneAnalyzersAreNotClosed(Map<PreBuiltAnalyzers, List<Version>> loadedAnalyzers) throws IllegalAccessException, NoSuchFieldException {
|
||||
for (Map.Entry<PreBuiltAnalyzers, List<Version>> preBuiltAnalyzerEntry : loadedAnalyzers.entrySet()) {
|
||||
PreBuiltAnalyzers preBuiltAnalyzer = preBuiltAnalyzerEntry.getKey();
|
||||
for (Version version : preBuiltAnalyzerEntry.getValue()) {
|
||||
Analyzer analyzer = preBuiltAnalyzerEntry.getKey().getCache().get(version);
|
||||
|
||||
for (Map.Entry<Version, Analyzer> luceneAnalyzerEntry : preBuiltAnalyzer.getCachedAnalyzers().entrySet()) {
|
||||
Field field = getFieldFromClass("storedValue", luceneAnalyzerEntry.getValue());
|
||||
boolean currentAccessible = field.isAccessible();
|
||||
field.setAccessible(true);
|
||||
Object storedValue = field.get(preBuiltAnalyzer.getAnalyzer(luceneAnalyzerEntry.getKey()));
|
||||
field.setAccessible(currentAccessible);
|
||||
Field field = getFieldFromClass("storedValue", analyzer);
|
||||
boolean currentAccessible = field.isAccessible();
|
||||
field.setAccessible(true);
|
||||
Object storedValue = field.get(analyzer);
|
||||
field.setAccessible(currentAccessible);
|
||||
|
||||
assertThat(String.format(Locale.ROOT, "Analyzer %s in version %s seems to be closed", preBuiltAnalyzer.name(), luceneAnalyzerEntry.getKey()), storedValue, is(notNullValue()));
|
||||
assertThat(String.format(Locale.ROOT, "Analyzer %s in version %s seems to be closed", preBuiltAnalyzer.name(), version), storedValue, is(notNullValue()));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
Loading…
Reference in New Issue