Refactoring IndicesAnalysisService

Using enums where possible in order to clean up the code in IndicesAnalysisService

Also introduced a simpler generic caching mechanism, and tests.
This commit is contained in:
Alexander Reelsen 2013-11-18 16:00:01 +01:00
parent adb83182a5
commit 8257370415
13 changed files with 906 additions and 643 deletions

View File

@ -19,7 +19,12 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
import java.util.Locale;
public class PreBuiltCharFilterFactoryFactory implements CharFilterFactoryFactory { public class PreBuiltCharFilterFactoryFactory implements CharFilterFactoryFactory {
@ -31,6 +36,11 @@ public class PreBuiltCharFilterFactoryFactory implements CharFilterFactoryFactor
@Override @Override
public CharFilterFactory create(String name, Settings settings) { public CharFilterFactory create(String name, Settings settings) {
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
if (!Version.CURRENT.equals(indexVersion)) {
return PreBuiltCharFilters.valueOf(name.toUpperCase(Locale.ROOT)).getCharFilterFactory(indexVersion);
}
return charFilterFactory; return charFilterFactory;
} }
} }

View File

@ -19,7 +19,12 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.indices.analysis.PreBuiltTokenFilters;
import java.util.Locale;
public class PreBuiltTokenFilterFactoryFactory implements TokenFilterFactoryFactory { public class PreBuiltTokenFilterFactoryFactory implements TokenFilterFactoryFactory {
@ -31,6 +36,10 @@ public class PreBuiltTokenFilterFactoryFactory implements TokenFilterFactoryFact
@Override @Override
public TokenFilterFactory create(String name, Settings settings) { public TokenFilterFactory create(String name, Settings settings) {
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
if (!Version.CURRENT.equals(indexVersion)) {
return PreBuiltTokenFilters.valueOf(name.toUpperCase(Locale.ROOT)).getTokenFilterFactory(indexVersion);
}
return tokenFilterFactory; return tokenFilterFactory;
} }
} }

View File

@ -19,7 +19,12 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
import java.util.Locale;
public class PreBuiltTokenizerFactoryFactory implements TokenizerFactoryFactory { public class PreBuiltTokenizerFactoryFactory implements TokenizerFactoryFactory {
@ -31,6 +36,12 @@ public class PreBuiltTokenizerFactoryFactory implements TokenizerFactoryFactory
@Override @Override
public TokenizerFactory create(String name, Settings settings) { public TokenizerFactory create(String name, Settings settings) {
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
if (!Version.CURRENT.equals(indexVersion)) {
TokenizerFactory versionedTokenizerFactory = PreBuiltTokenizers.valueOf(name.toUpperCase(Locale.ROOT)).getTokenizerFactory(indexVersion);
return versionedTokenizerFactory;
}
return tokenizerFactory; return tokenizerFactory;
} }
} }

View File

@ -82,574 +82,40 @@ public class IndicesAnalysisService extends AbstractComponent {
public IndicesAnalysisService(Settings settings) { public IndicesAnalysisService(Settings settings) {
super(settings); super(settings);
// Analyzers
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) { for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT); String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT))); analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
} }
// Base Tokenizers // Tokenizers
tokenizerFactories.put("standard", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { for (PreBuiltTokenizers preBuiltTokenizer : PreBuiltTokenizers.values()) {
@Override String name = preBuiltTokenizer.name().toLowerCase(Locale.ROOT);
public String name() { tokenizerFactories.put(name, new PreBuiltTokenizerFactoryFactory(preBuiltTokenizer.getTokenizerFactory(Version.CURRENT)));
return "standard"; }
}
// Tokenizer aliases
@Override tokenizerFactories.put("nGram", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.NGRAM.getTokenizerFactory(Version.CURRENT)));
public Tokenizer create(Reader reader) { tokenizerFactories.put("edgeNGram", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.EDGE_NGRAM.getTokenizerFactory(Version.CURRENT)));
return new StandardTokenizer(Lucene.ANALYZER_VERSION, reader);
}
})); // Token filters
for (PreBuiltTokenFilters preBuiltTokenFilter : PreBuiltTokenFilters.values()) {
tokenizerFactories.put("classic", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { String name = preBuiltTokenFilter.name().toLowerCase(Locale.ROOT);
@Override tokenFilterFactories.put(name, new PreBuiltTokenFilterFactoryFactory(preBuiltTokenFilter.getTokenFilterFactory(Version.CURRENT)));
public String name() { }
return "classic"; // Token filter aliases
} tokenFilterFactories.put("nGram", new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.NGRAM.getTokenFilterFactory(Version.CURRENT)));
tokenFilterFactories.put("edgeNGram", new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.EDGE_NGRAM.getTokenFilterFactory(Version.CURRENT)));
@Override
public Tokenizer create(Reader reader) {
return new ClassicTokenizer(Lucene.ANALYZER_VERSION, reader); // Char Filters
} for (PreBuiltCharFilters preBuiltCharFilter : PreBuiltCharFilters.values()) {
})); String name = preBuiltCharFilter.name().toLowerCase(Locale.ROOT);
charFilterFactories.put(name, new PreBuiltCharFilterFactoryFactory(preBuiltCharFilter.getCharFilterFactory(Version.CURRENT)));
tokenizerFactories.put("uax_url_email", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { }
@Override // Char filter aliases
public String name() { charFilterFactories.put("htmlStrip", new PreBuiltCharFilterFactoryFactory(PreBuiltCharFilters.HTML_STRIP.getCharFilterFactory(Version.CURRENT)));
return "uax_url_email";
}
@Override
public Tokenizer create(Reader reader) {
return new UAX29URLEmailTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
tokenizerFactories.put("path_hierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "path_hierarchy";
}
@Override
public Tokenizer create(Reader reader) {
return new PathHierarchyTokenizer(reader);
}
}));
tokenizerFactories.put("keyword", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "keyword";
}
@Override
public Tokenizer create(Reader reader) {
return new KeywordTokenizer(reader);
}
}));
tokenizerFactories.put("letter", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "letter";
}
@Override
public Tokenizer create(Reader reader) {
return new LetterTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
tokenizerFactories.put("lowercase", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "lowercase";
}
@Override
public Tokenizer create(Reader reader) {
return new LowerCaseTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
tokenizerFactories.put("whitespace", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "whitespace";
}
@Override
public Tokenizer create(Reader reader) {
return new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
tokenizerFactories.put("nGram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "nGram";
}
@Override
public Tokenizer create(Reader reader) {
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
tokenizerFactories.put("ngram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "ngram";
}
@Override
public Tokenizer create(Reader reader) {
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
tokenizerFactories.put("edgeNGram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "edgeNGram";
}
@Override
public Tokenizer create(Reader reader) {
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
}
}));
tokenizerFactories.put("edge_ngram", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "edge_ngram";
}
@Override
public Tokenizer create(Reader reader) {
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
}
}));
tokenizerFactories.put("pattern", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "pattern";
}
@Override
public Tokenizer create(Reader reader) {
return new PatternTokenizer(reader, Regex.compile("\\W+", null), -1);
}
}));
// Token Filters
tokenFilterFactories.put("word_delimiter", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "word_delimiter";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new WordDelimiterFilter(tokenStream,
WordDelimiterFilter.GENERATE_WORD_PARTS |
WordDelimiterFilter.GENERATE_NUMBER_PARTS |
WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
WordDelimiterFilter.SPLIT_ON_NUMERICS |
WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
}
}));
tokenFilterFactories.put("stop", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "stop";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new StopFilter(Lucene.ANALYZER_VERSION, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
}));
tokenFilterFactories.put("trim", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "trim";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new TrimFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
tokenFilterFactories.put("reverse", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "reverse";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new ReverseStringFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
tokenFilterFactories.put("asciifolding", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "asciifolding";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new ASCIIFoldingFilter(tokenStream);
}
}));
tokenFilterFactories.put("length", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "length";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new LengthFilter(Lucene.ANALYZER_VERSION, tokenStream, 0, Integer.MAX_VALUE);
}
}));
tokenFilterFactories.put("common_grams", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "common_grams";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new CommonGramsFilter(Lucene.ANALYZER_VERSION, tokenStream, CharArraySet.EMPTY_SET);
}
}));
tokenFilterFactories.put("lowercase", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "lowercase";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new LowerCaseFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
tokenFilterFactories.put("kstem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "kstem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new KStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("porter_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "porter_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new PorterStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("standard", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "standard";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new StandardFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
tokenFilterFactories.put("classic", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "classic";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new ClassicFilter(tokenStream);
}
}));
tokenFilterFactories.put("nGram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "nGram";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
tokenFilterFactories.put("ngram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "ngram";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
tokenFilterFactories.put("edgeNGram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "edgeNGram";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
}
}));
tokenFilterFactories.put("edge_ngram", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "edge_ngram";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
}
}));
tokenFilterFactories.put("shingle", new PreBuiltTokenFilterFactoryFactory(new ShingleTokenFilterFactory.Factory("shingle")));
tokenFilterFactories.put("unique", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "unique";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new UniqueTokenFilter(tokenStream);
}
}));
tokenFilterFactories.put("truncate", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "truncate";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new TruncateTokenFilter(tokenStream, 10);
}
}));
// Extended Token Filters
tokenFilterFactories.put("snowball", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "snowball";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new SnowballFilter(tokenStream, "English");
}
}));
tokenFilterFactories.put("stemmer", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "stemmer";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new PorterStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("elision", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "elision";
}
@Override
public TokenStream create(TokenStream tokenStream) {
// LUCENE 4 UPGRADE: French default for now, make set of articles configurable
return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES);
}
}));
tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "arabic_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new ArabicStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("brazilian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "brazilian_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new BrazilianStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("czech_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "czech_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new CzechStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("dutch_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "dutch_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new DutchStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("french_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "french_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new FrenchStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("german_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "german_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new GermanStemFilter(tokenStream);
}
}));
tokenFilterFactories.put("russian_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "russian_stem";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new SnowballFilter(tokenStream, "Russian");
}
}));
tokenFilterFactories.put("keyword_repeat", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "keyword_repeat";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new KeywordRepeatFilter(tokenStream);
}
}));
tokenFilterFactories.put("arabic_normalization", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "arabic_normalization";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new ArabicNormalizationFilter(tokenStream);
}
}));
tokenFilterFactories.put("persian_normalization", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "persian_normalization";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new PersianNormalizationFilter(tokenStream);
}
}));
tokenFilterFactories.put("type_as_payload", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override
public String name() {
return "type_as_payload";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new TypeAsPayloadTokenFilter(tokenStream);
}
}));
// Char Filter
charFilterFactories.put("html_strip", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
@Override
public String name() {
return "html_strip";
}
@Override
public Reader create(Reader tokenStream) {
return new HTMLStripCharFilter(tokenStream);
}
}));
charFilterFactories.put("htmlStrip", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
@Override
public String name() {
return "htmlStrip";
}
@Override
public Reader create(Reader tokenStream) {
return new HTMLStripCharFilter(tokenStream);
}
}));
} }
public boolean hasCharFilter(String name) { public boolean hasCharFilter(String name) {

View File

@ -18,8 +18,6 @@
*/ */
package org.elasticsearch.indices.analysis; package org.elasticsearch.indices.analysis;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
@ -62,12 +60,10 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer; import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import java.util.Map;
/** /**
* *
@ -373,71 +369,30 @@ public enum PreBuiltAnalyzers {
} }
}; };
/**
* The strategy of caching the analyzer
*
* ONE Exactly one version is stored. Useful for analyzers which do not store version information
* LUCENE Exactly one version for each lucene version is stored. Useful to prevent different analyzers with the same version
* ELASTICSEARCH Exactly one version per elasticsearch version is stored. Useful if you change an analyzer between elasticsearch releases, when the lucene version does not change
*/
private static enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
private CachingStrategy cachingStrategy;
protected final Map<Version, Analyzer> cachedAnalyzers = Maps.newHashMapWithExpectedSize(2);
PreBuiltAnalyzers() {
this(CachingStrategy.LUCENE);
}
PreBuiltAnalyzers(CachingStrategy cachingStrategy) {
this.cachingStrategy = cachingStrategy;
}
abstract protected Analyzer create(Version version); abstract protected Analyzer create(Version version);
public Map<Version, Analyzer> getCachedAnalyzers() { protected final PreBuiltCacheFactory.PreBuiltCache<Analyzer> cache;
return ImmutableMap.copyOf(cachedAnalyzers);
PreBuiltAnalyzers() {
this(PreBuiltCacheFactory.CachingStrategy.LUCENE);
}
PreBuiltAnalyzers(PreBuiltCacheFactory.CachingStrategy cachingStrategy) {
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
}
PreBuiltCacheFactory.PreBuiltCache<Analyzer> getCache() {
return cache;
} }
public synchronized Analyzer getAnalyzer(Version version) { public synchronized Analyzer getAnalyzer(Version version) {
Analyzer analyzer = getCachedAnalyzer(version); Analyzer analyzer = cache.get(version);
if (analyzer == null) { if (analyzer == null) {
analyzer = this.create(version); analyzer = this.create(version);
} cache.put(version, analyzer);
if (!cachedAnalyzers.containsKey(version)) {
cachedAnalyzers.put(version, analyzer);
} }
return analyzer; return analyzer;
} }
private Analyzer getCachedAnalyzer(Version version) {
switch (this.cachingStrategy) {
case ONE:
// method to return the first found analyzer in the cache
if (cachedAnalyzers.size() > 0) {
return (Analyzer) cachedAnalyzers.values().toArray()[0];
}
break;
case LUCENE:
// find already cached analyzers with the same lucene version
for (Version elasticsearchVersion : cachedAnalyzers.keySet()) {
if (elasticsearchVersion.luceneVersion.equals(version.luceneVersion)) {
return cachedAnalyzers.get(elasticsearchVersion);
}
}
break;
case ELASTICSEARCH:
// check only for the same es version
if (cachedAnalyzers.containsKey(version)) {
return cachedAnalyzers.get(version);
}
break;
default:
throw new ElasticSearchException("No action configured for caching strategy[" + this.cachingStrategy + "]");
}
return null;
}
} }

View File

@ -0,0 +1,114 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import com.google.common.collect.Maps;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.Version;
import java.util.Map;
/**
*
*/
public class PreBuiltCacheFactory {
/**
* The strategy of caching the analyzer
*
* ONE Exactly one version is stored. Useful for analyzers which do not store version information
* LUCENE Exactly one version for each lucene version is stored. Useful to prevent different analyzers with the same version
* ELASTICSEARCH Exactly one version per elasticsearch version is stored. Useful if you change an analyzer between elasticsearch releases, when the lucene version does not change
*/
static enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
public interface PreBuiltCache<T> {
T get(Version version);
void put(Version version, T t);
}
private PreBuiltCacheFactory() {}
static <T> PreBuiltCache<T> getCache(CachingStrategy cachingStrategy) {
switch (cachingStrategy) {
case ONE:
return new PreBuiltCacheStrategyOne<T>();
case LUCENE:
return new PreBuiltCacheStrategyLucene<T>();
case ELASTICSEARCH:
return new PreBuiltCacheStrategyElasticsearch<T>();
default:
throw new ElasticSearchException("No action configured for caching strategy[" + cachingStrategy + "]");
}
}
/**
* This is a pretty simple cache, it only contains one version
*/
private static class PreBuiltCacheStrategyOne<T> implements PreBuiltCache<T> {
private T model = null;
@Override
public T get(Version version) {
return model;
}
@Override
public void put(Version version, T model) {
this.model = model;
}
}
/**
* This cache contains one version for each elasticsearch version object
*/
private static class PreBuiltCacheStrategyElasticsearch<T> implements PreBuiltCache<T> {
Map<Version, T> mapModel = Maps.newHashMapWithExpectedSize(2);
@Override
public T get(Version version) {
return mapModel.get(version);
}
@Override
public void put(Version version, T model) {
mapModel.put(version, model);
}
}
/**
* This cache uses the lucene version for caching
*/
private static class PreBuiltCacheStrategyLucene<T> implements PreBuiltCache<T> {
private Map<org.apache.lucene.util.Version, T> mapModel = Maps.newHashMapWithExpectedSize(2);
@Override
public T get(Version version) {
return mapModel.get(version.luceneVersion);
}
@Override
public void put(org.elasticsearch.Version version, T model) {
mapModel.put(version.luceneVersion, model);
}
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.elasticsearch.Version;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import java.io.Reader;
import java.util.Locale;
/**
*
*/
public enum PreBuiltCharFilters {
HTML_STRIP(CachingStrategy.ONE) {
@Override
public Reader create(Reader tokenStream, Version version) {
return new HTMLStripCharFilter(tokenStream);
}
};
abstract public Reader create(Reader tokenStream, Version version);
protected final PreBuiltCacheFactory.PreBuiltCache<CharFilterFactory> cache;
PreBuiltCharFilters(CachingStrategy cachingStrategy) {
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
}
public synchronized CharFilterFactory getCharFilterFactory(final Version version) {
CharFilterFactory charFilterFactory = cache.get(version);
if (charFilterFactory == null) {
final String finalName = name();
charFilterFactory = new CharFilterFactory() {
@Override
public String name() {
return finalName.toLowerCase(Locale.ROOT);
}
@Override
public Reader create(Reader tokenStream) {
return valueOf(finalName).create(tokenStream, version);
}
};
cache.put(version, charFilterFactory);
}
return charFilterFactory;
}
}

View File

@ -0,0 +1,312 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.fr.FrenchStemFilter;
import org.apache.lucene.analysis.miscellaneous.*;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.nl.DutchStemFilter;
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.Version;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import java.util.Locale;
/**
*
*/
public enum PreBuiltTokenFilters {
WORD_DELIMITER(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new WordDelimiterFilter(tokenStream,
WordDelimiterFilter.GENERATE_WORD_PARTS |
WordDelimiterFilter.GENERATE_NUMBER_PARTS |
WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
WordDelimiterFilter.SPLIT_ON_NUMERICS |
WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
}
},
STOP(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new StopFilter(version.luceneVersion, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
},
TRIM(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new TrimFilter(version.luceneVersion, tokenStream);
}
},
REVERSE(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ReverseStringFilter(version.luceneVersion, tokenStream);
}
},
ASCIIFOLDING(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ASCIIFoldingFilter(tokenStream);
}
},
LENGTH(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new LengthFilter(version.luceneVersion, tokenStream, 0, Integer.MAX_VALUE);
}
},
COMMON_GRAMS(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new CommonGramsFilter(version.luceneVersion, tokenStream, CharArraySet.EMPTY_SET);
}
},
LOWERCASE(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new LowerCaseFilter(version.luceneVersion, tokenStream);
}
},
KSTEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new KStemFilter(tokenStream);
}
},
PORTER_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new PorterStemFilter(tokenStream);
}
},
STANDARD(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new StandardFilter(version.luceneVersion, tokenStream);
}
},
CLASSIC(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ClassicFilter(tokenStream);
}
},
NGRAM(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new NGramTokenFilter(version.luceneVersion, tokenStream);
}
},
EDGE_NGRAM(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new EdgeNGramTokenFilter(version.luceneVersion, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
}
},
UNIQUE(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new UniqueTokenFilter(tokenStream);
}
},
TRUNCATE(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new TruncateTokenFilter(tokenStream, 10);
}
},
// Extended Token Filters
SNOWBALL(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new SnowballFilter(tokenStream, "English");
}
},
STEMMER(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new PorterStemFilter(tokenStream);
}
},
ELISION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES);
}
},
ARABIC_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ArabicStemFilter(tokenStream);
}
},
BRAZILIAN_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new BrazilianStemFilter(tokenStream);
}
},
CZECH_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new CzechStemFilter(tokenStream);
}
},
DUTCH_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new DutchStemFilter(tokenStream);
}
},
FRENCH_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new FrenchStemFilter(tokenStream);
}
},
GERMAN_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new GermanStemFilter(tokenStream);
}
},
RUSSIAN_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new SnowballFilter(tokenStream, "Russian");
}
},
KEYWORD_REPEAT(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new KeywordRepeatFilter(tokenStream);
}
},
ARABIC_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ArabicNormalizationFilter(tokenStream);
}
},
PERSIAN_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new PersianNormalizationFilter(tokenStream);
}
},
TYPE_AS_PAYLOAD(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new TypeAsPayloadTokenFilter(tokenStream);
}
},
SHINGLE(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ShingleFilter(tokenStream);
}
};
abstract public TokenStream create(TokenStream tokenStream, Version version);
protected final PreBuiltCacheFactory.PreBuiltCache<TokenFilterFactory> cache;
PreBuiltTokenFilters(CachingStrategy cachingStrategy) {
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
}
public synchronized TokenFilterFactory getTokenFilterFactory(final Version version) {
TokenFilterFactory factory = cache.get(version);
if (factory == null) {
final String finalName = name();
factory = new TokenFilterFactory() {
@Override
public String name() {
return finalName.toLowerCase(Locale.ROOT);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return valueOf(finalName).create(tokenStream, version);
}
};
cache.put(version, factory);
}
return factory;
}
}

View File

@ -0,0 +1,154 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import java.io.Reader;
import java.util.Locale;
/**
*
*/
public enum PreBuiltTokenizers {
STANDARD(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new StandardTokenizer(version.luceneVersion, reader);
}
},
CLASSIC(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new ClassicTokenizer(version.luceneVersion, reader);
}
},
UAX_URL_EMAIL(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new UAX29URLEmailTokenizer(version.luceneVersion, reader);
}
},
PATH_HIERARCHY(CachingStrategy.ONE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new PathHierarchyTokenizer(reader);
}
},
KEYWORD(CachingStrategy.ONE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new KeywordTokenizer(reader);
}
},
LETTER(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new LetterTokenizer(version.luceneVersion, reader);
}
},
LOWERCASE(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new LowerCaseTokenizer(version.luceneVersion, reader);
}
},
WHITESPACE(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new WhitespaceTokenizer(version.luceneVersion, reader);
}
},
NGRAM(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new NGramTokenizer(version.luceneVersion, reader);
}
},
EDGE_NGRAM(CachingStrategy.LUCENE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new EdgeNGramTokenizer(version.luceneVersion, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
}
},
PATTERN(CachingStrategy.ONE) {
@Override
protected Tokenizer create(Reader reader, Version version) {
return new PatternTokenizer(reader, Regex.compile("\\W+", null), -1);
}
};
abstract protected Tokenizer create(Reader reader, Version version);
protected final PreBuiltCacheFactory.PreBuiltCache<TokenizerFactory> cache;
PreBuiltTokenizers(CachingStrategy cachingStrategy) {
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
}
public synchronized TokenizerFactory getTokenizerFactory(final Version version) {
TokenizerFactory tokenizerFactory = cache.get(version);
if (tokenizerFactory == null) {
final String finalName = name();
tokenizerFactory = new TokenizerFactory() {
@Override
public String name() {
return finalName.toLowerCase(Locale.ROOT);
}
@Override
public Tokenizer create(Reader reader) {
return valueOf(finalName).create(reader, version);
}
};
cache.put(version, tokenizerFactory);
}
return tokenizerFactory;
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.*;
import static org.hamcrest.MatcherAssert.assertThat;
/**
*
*/
public class PreBuiltCharFilterFactoryFactoryTests {
@Test
public void testThatDifferentVersionsCanBeLoaded() {
PreBuiltCharFilterFactoryFactory factory = new PreBuiltCharFilterFactoryFactory(PreBuiltCharFilters.HTML_STRIP.getCharFilterFactory(Version.CURRENT));
CharFilterFactory emptySettingsTokenizerFactory = factory.create("html_strip", ImmutableSettings.EMPTY);
CharFilterFactory former090TokenizerFactory = factory.create("html_strip", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build());
CharFilterFactory former090TokenizerFactoryCopy = factory.create("html_strip", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build());
CharFilterFactory currentTokenizerFactory = factory.create("html_strip", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactory));
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactoryCopy));
}
}

View File

@ -0,0 +1,63 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.indices.analysis.PreBuiltTokenFilters;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.*;
/**
*
*/
public class PreBuiltTokenFilterFactoryFactoryTests extends ElasticsearchTestCase {
@Test
public void testThatCachingWorksForCachingStrategyOne() {
PreBuiltTokenFilterFactoryFactory factory = new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.WORD_DELIMITER.getTokenFilterFactory(Version.CURRENT));
TokenFilterFactory emptySettingsTokenizerFactory = factory.create("word_delimiter", ImmutableSettings.EMPTY);
TokenFilterFactory former090TokenizerFactory = factory.create("word_delimiter", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_1).build());
TokenFilterFactory former090TokenizerFactoryCopy = factory.create("word_delimiter", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_2).build());
TokenFilterFactory currentTokenizerFactory = factory.create("word_delimiter", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactory));
assertThat(emptySettingsTokenizerFactory, is(former090TokenizerFactoryCopy));
}
@Test
public void testThatDifferentVersionsCanBeLoaded() {
PreBuiltTokenFilterFactoryFactory factory = new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.STOP.getTokenFilterFactory(Version.CURRENT));
TokenFilterFactory emptySettingsTokenizerFactory = factory.create("stop", ImmutableSettings.EMPTY);
TokenFilterFactory former090TokenizerFactory = factory.create("stop", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_1).build());
TokenFilterFactory former090TokenizerFactoryCopy = factory.create("stop", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_2).build());
TokenFilterFactory currentTokenizerFactory = factory.create("stop", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
assertThat(emptySettingsTokenizerFactory, is(not(former090TokenizerFactory)));
assertThat(former090TokenizerFactory, is(former090TokenizerFactoryCopy));
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.*;
/**
*
*/
public class PreBuiltTokenizerFactoryFactoryTests extends ElasticsearchTestCase {
@Test
public void testThatDifferentVersionsCanBeLoaded() {
PreBuiltTokenizerFactoryFactory factory = new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.STANDARD.getTokenizerFactory(Version.CURRENT));
TokenizerFactory emptySettingsTokenizerFactory = factory.create("standard", ImmutableSettings.EMPTY);
// different es versions, same lucene version, thus cached
TokenizerFactory former090TokenizerFactory = factory.create("standard", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_1).build());
TokenizerFactory former090TokenizerFactoryCopy = factory.create("standard", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_2).build());
TokenizerFactory currentTokenizerFactory = factory.create("standard", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
assertThat(emptySettingsTokenizerFactory, is(currentTokenizerFactory));
assertThat(emptySettingsTokenizerFactory, is(not(former090TokenizerFactory)));
assertThat(emptySettingsTokenizerFactory, is(not(former090TokenizerFactoryCopy)));
assertThat(former090TokenizerFactory, is(former090TokenizerFactoryCopy));
}
}

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations * specific language governing permissions and limitations
* under the License. * under the License.
*/ */
package org.elasticsearch.index.analysis; package org.elasticsearch.indices.analysis;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
@ -26,7 +26,6 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test; import org.junit.Test;
@ -36,7 +35,8 @@ import java.util.Locale;
import java.util.Map; import java.util.Map;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.*; import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;
/** /**
* *
@ -105,36 +105,35 @@ public class PreBuiltAnalyzerIntegrationTests extends ElasticsearchIntegrationTe
assertThatAnalyzersHaveBeenLoaded(loadedAnalyzers); assertThatAnalyzersHaveBeenLoaded(loadedAnalyzers);
// check that all of the prebuiltanalyzers are still open // check that all of the prebuiltanalyzers are still open
for (PreBuiltAnalyzers preBuiltAnalyzer : PreBuiltAnalyzers.values()) { assertLuceneAnalyzersAreNotClosed(loadedAnalyzers);
assertLuceneAnalyzerIsNotClosed(preBuiltAnalyzer);
}
} }
private void assertThatAnalyzersHaveBeenLoaded(Map<PreBuiltAnalyzers, List<Version>> expectedLoadedAnalyzers) { private void assertThatAnalyzersHaveBeenLoaded(Map<PreBuiltAnalyzers, List<Version>> expectedLoadedAnalyzers) {
for (Map.Entry<PreBuiltAnalyzers, List<Version>> entry : expectedLoadedAnalyzers.entrySet()) { for (Map.Entry<PreBuiltAnalyzers, List<Version>> entry : expectedLoadedAnalyzers.entrySet()) {
Map<Version, Analyzer> cachedAnalyzers = entry.getKey().getCachedAnalyzers(); for (Version version : entry.getValue()) {
assertThat(cachedAnalyzers.keySet(), hasItems(entry.getValue().toArray(new Version[]{}))); // if it is not null in the cache, it has been loaded
/*for (Version expectedVersion : entry.getValue()) { assertThat(entry.getKey().getCache().get(version), is(notNullValue()));
assertThat(cachedAnalyzers, contains(ex))
} }
*/
} }
} }
// the close() method of a lucene analyzer sets the storedValue field to null // the close() method of a lucene analyzer sets the storedValue field to null
// we simply check this via reflection - ugly but works // we simply check this via reflection - ugly but works
private void assertLuceneAnalyzerIsNotClosed(PreBuiltAnalyzers preBuiltAnalyzer) throws IllegalAccessException, NoSuchFieldException { private void assertLuceneAnalyzersAreNotClosed(Map<PreBuiltAnalyzers, List<Version>> loadedAnalyzers) throws IllegalAccessException, NoSuchFieldException {
for (Map.Entry<PreBuiltAnalyzers, List<Version>> preBuiltAnalyzerEntry : loadedAnalyzers.entrySet()) {
PreBuiltAnalyzers preBuiltAnalyzer = preBuiltAnalyzerEntry.getKey();
for (Version version : preBuiltAnalyzerEntry.getValue()) {
Analyzer analyzer = preBuiltAnalyzerEntry.getKey().getCache().get(version);
for (Map.Entry<Version, Analyzer> luceneAnalyzerEntry : preBuiltAnalyzer.getCachedAnalyzers().entrySet()) { Field field = getFieldFromClass("storedValue", analyzer);
Field field = getFieldFromClass("storedValue", luceneAnalyzerEntry.getValue()); boolean currentAccessible = field.isAccessible();
boolean currentAccessible = field.isAccessible(); field.setAccessible(true);
field.setAccessible(true); Object storedValue = field.get(analyzer);
Object storedValue = field.get(preBuiltAnalyzer.getAnalyzer(luceneAnalyzerEntry.getKey())); field.setAccessible(currentAccessible);
field.setAccessible(currentAccessible);
assertThat(String.format(Locale.ROOT, "Analyzer %s in version %s seems to be closed", preBuiltAnalyzer.name(), luceneAnalyzerEntry.getKey()), storedValue, is(notNullValue())); assertThat(String.format(Locale.ROOT, "Analyzer %s in version %s seems to be closed", preBuiltAnalyzer.name(), version), storedValue, is(notNullValue()));
}
} }
} }
/** /**