Make PreBuiltAnalyzerProviderFactory plugable via AnalysisPlugin and

move `finger_print`, `pattern` and `standard_html_strip` analyzers
to analysis-common module. (both AnalysisProvider and PreBuiltAnalyzerProvider)

Changed PreBuiltAnalyzerProviderFactory to extend from PreConfiguredAnalysisComponent and
changed to make sure that predefined analyzers are always instantiated with the current
ES version and if an instance is requested for a different version then delegate to PreBuiltCache.
This is similar to the behaviour that exists today in AnalysisRegistry.PreBuiltAnalysis and
PreBuiltAnalyzerProviderFactory. (#31095)

Relates to #23658
This commit is contained in:
Martijn van Groningen 2018-06-06 07:40:21 +02:00 committed by GitHub
parent 805648848d
commit 735d0e671a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 267 additions and 92 deletions

View File

@ -19,6 +19,7 @@
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
@ -79,7 +80,9 @@ import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@ -87,6 +90,7 @@ import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
import org.tartarus.snowball.ext.DutchStemmer;
@ -103,6 +107,15 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(CommonAnalysisPlugin.class));
@Override
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new);
return analyzers;
}
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
@ -197,6 +210,16 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
return tokenizers;
}
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE,
version -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version ->
new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET)));
return analyzers;
}
@Override
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
List<PreConfiguredCharFilter> filters = new ArrayList<>();

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@ -35,7 +35,7 @@ public final class FingerprintAnalyzer extends Analyzer {
private final int maxOutputSize;
private final CharArraySet stopWords;
public FingerprintAnalyzer(CharArraySet stopWords, char separator, int maxOutputSize) {
FingerprintAnalyzer(CharArraySet stopWords, char separator, int maxOutputSize) {
this.separator = separator;
this.maxOutputSize = maxOutputSize;
this.stopWords = stopWords;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@ -25,6 +25,8 @@ import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
/**
@ -42,7 +44,7 @@ public class FingerprintAnalyzerProvider extends AbstractIndexAnalyzerProvider<A
private final FingerprintAnalyzer analyzer;
public FingerprintAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
FingerprintAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
char separator = parseSeparator(settings);

View File

@ -21,15 +21,13 @@ package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import static org.elasticsearch.index.analysis.FingerprintAnalyzerProvider.DEFAULT_MAX_OUTPUT_SIZE;
import static org.elasticsearch.index.analysis.FingerprintAnalyzerProvider.MAX_OUTPUT_SIZE;
import static org.elasticsearch.analysis.common.FingerprintAnalyzerProvider.DEFAULT_MAX_OUTPUT_SIZE;
import static org.elasticsearch.analysis.common.FingerprintAnalyzerProvider.MAX_OUTPUT_SIZE;
public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@ -35,7 +35,7 @@ public final class PatternAnalyzer extends Analyzer {
private final boolean lowercase;
private final CharArraySet stopWords;
public PatternAnalyzer(Pattern pattern, boolean lowercase, CharArraySet stopWords) {
PatternAnalyzer(Pattern pattern, boolean lowercase, CharArraySet stopWords) {
this.pattern = pattern;
this.lowercase = lowercase;
this.stopWords = stopWords;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@ -25,6 +25,8 @@ import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
import java.util.regex.Pattern;
@ -32,7 +34,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analy
private final PatternAnalyzer analyzer;
public PatternAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
PatternAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@ -39,7 +39,7 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
super(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
public StandardHtmlStripAnalyzer(CharArraySet stopwords) {
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
super(stopwords);
}

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.StopAnalyzer;
@ -25,12 +25,14 @@ import org.elasticsearch.Version;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
private final StandardHtmlStripAnalyzer analyzer;
public StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);

View File

@ -1,4 +1,4 @@
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
/*
* Licensed to Elasticsearch under one or more contributor

View File

@ -1,4 +1,4 @@
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
/*
* Licensed to Elasticsearch under one or more contributor

View File

@ -37,3 +37,35 @@
analyzer: bengali
- length: { tokens: 1 }
- match: { tokens.0.token: বার }
---
"fingerprint":
- do:
indices.analyze:
body:
text: A1 B2 A1 D4 C3
analyzer: fingerprint
- length: { tokens: 1 }
- match: { tokens.0.token: a1 b2 c3 d4 }
---
"standard_html_strip":
- do:
indices.analyze:
body:
text: <bold/> <italic/>
analyzer: standard_html_strip
- length: { tokens: 2 }
- match: { tokens.0.token: bold }
- match: { tokens.1.token: italic }
---
"pattern":
- do:
indices.analyze:
body:
text: foo bar
analyzer: pattern
- length: { tokens: 2 }
- match: { tokens.0.token: foo }
- match: { tokens.1.token: bar }

View File

@ -22,7 +22,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
@ -70,14 +69,16 @@ public final class AnalysisRegistry implements Closeable {
Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers,
Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
this.environment = environment;
this.charFilters = unmodifiableMap(charFilters);
this.tokenFilters = unmodifiableMap(tokenFilters);
this.tokenizers = unmodifiableMap(tokenizers);
this.analyzers = unmodifiableMap(analyzers);
this.normalizers = unmodifiableMap(normalizers);
prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
prebuiltAnalysis =
new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
/**
@ -398,13 +399,15 @@ public final class AnalysisRegistry implements Closeable {
private PrebuiltAnalysis(
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap<>();
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers,
Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
// Analyzers
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap<>();
analyzerProviderFactories.putAll(preConfiguredAnalyzers);
// Pre-build analyzers
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, preBuiltAnalyzerEnum));
}
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
@ -429,17 +432,10 @@ public final class AnalysisRegistry implements Closeable {
return analyzerProviderFactories.get(name);
}
Analyzer analyzer(String name) {
PreBuiltAnalyzerProviderFactory analyzerProviderFactory = (PreBuiltAnalyzerProviderFactory) analyzerProviderFactories.get(name);
if (analyzerProviderFactory == null) {
return null;
}
return analyzerProviderFactory.analyzer();
}
@Override
public void close() throws IOException {
IOUtils.close(analyzerProviderFactories.values().stream().map((a) -> ((PreBuiltAnalyzerProviderFactory)a).analyzer()).collect(Collectors.toList()));
IOUtils.close(analyzerProviderFactories.values().stream()
.map((a) -> ((PreBuiltAnalyzerProviderFactory)a)).collect(Collectors.toList()));
}
}

View File

@ -22,41 +22,101 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.Version;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
public class PreBuiltAnalyzerProviderFactory implements AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> {
public class PreBuiltAnalyzerProviderFactory extends PreConfiguredAnalysisComponent<AnalyzerProvider<?>> implements Closeable {
private final PreBuiltAnalyzerProvider analyzerProvider;
private final Function<Version, Analyzer> create;
private final PreBuiltAnalyzerProvider current;
public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) {
analyzerProvider = new PreBuiltAnalyzerProvider(name, scope, analyzer);
/**
* This constructor only exists to expose analyzers defined in {@link PreBuiltAnalyzers} as {@link PreBuiltAnalyzerProviderFactory}.
*/
PreBuiltAnalyzerProviderFactory(String name, PreBuiltAnalyzers preBuiltAnalyzer) {
super(name, new PreBuiltAnalyzersDelegateCache(name, preBuiltAnalyzer));
this.create = preBuiltAnalyzer::getAnalyzer;
current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, preBuiltAnalyzer.getAnalyzer(Version.CURRENT));
}
public AnalyzerProvider<?> create(String name, Settings settings) {
Version indexVersion = Version.indexCreated(settings);
if (!Version.CURRENT.equals(indexVersion)) {
PreBuiltAnalyzers preBuiltAnalyzers = PreBuiltAnalyzers.getOrDefault(name, null);
if (preBuiltAnalyzers != null) {
Analyzer analyzer = preBuiltAnalyzers.getAnalyzer(indexVersion);
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer);
}
}
return analyzerProvider;
public PreBuiltAnalyzerProviderFactory(String name, PreBuiltCacheFactory.CachingStrategy cache, Function<Version, Analyzer> create) {
super(name, cache);
this.create = create;
this.current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, create.apply(Version.CURRENT));
}
@Override
public AnalyzerProvider<?> get(IndexSettings indexSettings, Environment environment, String name, Settings settings)
throws IOException {
return create(name, settings);
public AnalyzerProvider<?> get(IndexSettings indexSettings,
Environment environment,
String name,
Settings settings) throws IOException {
Version versionCreated = Version.indexCreated(settings);
if (Version.CURRENT.equals(versionCreated) == false) {
return super.get(indexSettings, environment, name, settings);
} else {
return current;
}
}
public Analyzer analyzer() {
return analyzerProvider.get();
@Override
protected AnalyzerProvider<?> create(Version version) {
assert Version.CURRENT.equals(version) == false;
return new PreBuiltAnalyzerProvider(getName(), AnalyzerScope.INDICES, create.apply(version));
}
@Override
public void close() throws IOException {
List<Closeable> closeables = cache.values().stream()
.map(AnalyzerProvider::get)
.collect(Collectors.toList());
closeables.add(current.get());
IOUtils.close(closeables);
}
/**
* A special cache that closes the gap between PreBuiltAnalyzers and PreBuiltAnalyzerProviderFactory.
*
* This can be removed when all analyzers have been moved away from PreBuiltAnalyzers to
* PreBuiltAnalyzerProviderFactory either in server or analysis-common.
*/
static class PreBuiltAnalyzersDelegateCache implements PreBuiltCacheFactory.PreBuiltCache<AnalyzerProvider<?>> {
private final String name;
private final PreBuiltAnalyzers preBuiltAnalyzer;
private PreBuiltAnalyzersDelegateCache(String name, PreBuiltAnalyzers preBuiltAnalyzer) {
this.name = name;
this.preBuiltAnalyzer = preBuiltAnalyzer;
}
@Override
public AnalyzerProvider<?> get(Version version) {
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, preBuiltAnalyzer.getAnalyzer(version));
}
@Override
public void put(Version version, AnalyzerProvider<?> analyzerProvider) {
// No need to put, because we delegate in get() directly to PreBuiltAnalyzers which already caches.
}
@Override
public Collection<AnalyzerProvider<?>> values() {
return preBuiltAnalyzer.getCache().values().stream()
// Wrap the analyzer instance in a PreBuiltAnalyzerProvider, this is what PreBuiltAnalyzerProviderFactory#close expects
// (other caches are not directly caching analyzers, but analyzer provider instead.
.map(analyzer -> new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer))
.collect(Collectors.toList());
}
}
}

View File

@ -33,13 +33,18 @@ import java.io.IOException;
*/
public abstract class PreConfiguredAnalysisComponent<T> implements AnalysisModule.AnalysisProvider<T> {
private final String name;
private final PreBuiltCacheFactory.PreBuiltCache<T> cache;
protected final PreBuiltCacheFactory.PreBuiltCache<T> cache;
protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.CachingStrategy cache) {
protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.CachingStrategy cache) {
this.name = name;
this.cache = PreBuiltCacheFactory.getCache(cache);
}
protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.PreBuiltCache<T> cache) {
this.name = name;
this.cache = cache;
}
@Override
public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
Version versionCreated = Version.indexCreated(settings);

View File

@ -43,7 +43,6 @@ import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
@ -59,9 +58,9 @@ import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@ -73,7 +72,6 @@ import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StopAnalyzerProvider;
@ -122,11 +120,12 @@ public final class AnalysisModule {
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);
analysisRegistry = new AnalysisRegistry(environment,
charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
analyzers.getRegistry(), normalizers.getRegistry(),
preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
HunspellService getHunspellService() {
@ -162,6 +161,16 @@ public final class AnalysisModule {
return tokenFilters;
}
static Map<String, PreBuiltAnalyzerProviderFactory> setupPreBuiltAnalyzerProviderFactories(List<AnalysisPlugin> plugins) {
NamedRegistry<PreBuiltAnalyzerProviderFactory> preConfiguredCharFilters = new NamedRegistry<>("pre-built analyzer");
for (AnalysisPlugin plugin : plugins) {
for (PreBuiltAnalyzerProviderFactory factory : plugin.getPreBuiltAnalyzerProviderFactories()) {
preConfiguredCharFilters.register(factory.getName(), factory);
}
}
return unmodifiableMap(preConfiguredCharFilters.getRegistry());
}
static Map<String, PreConfiguredCharFilter> setupPreConfiguredCharFilters(List<AnalysisPlugin> plugins) {
NamedRegistry<PreConfiguredCharFilter> preConfiguredCharFilters = new NamedRegistry<>("pre-configured char_filter");
@ -232,12 +241,10 @@ public final class AnalysisModule {
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = new NamedRegistry<>("analyzer");
analyzers.register("default", StandardAnalyzerProvider::new);
analyzers.register("standard", StandardAnalyzerProvider::new);
analyzers.register("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.register("simple", SimpleAnalyzerProvider::new);
analyzers.register("stop", StopAnalyzerProvider::new);
analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
analyzers.register("keyword", KeywordAnalyzerProvider::new);
analyzers.register("pattern", PatternAnalyzerProvider::new);
analyzers.register("snowball", SnowballAnalyzerProvider::new);
analyzers.register("arabic", ArabicAnalyzerProvider::new);
analyzers.register("armenian", ArmenianAnalyzerProvider::new);
@ -274,7 +281,6 @@ public final class AnalysisModule {
analyzers.register("swedish", SwedishAnalyzerProvider::new);
analyzers.register("turkish", TurkishAnalyzerProvider::new);
analyzers.register("thai", ThaiAnalyzerProvider::new);
analyzers.register("fingerprint", FingerprintAnalyzerProvider::new);
analyzers.extractAndRegister(plugins, AnalysisPlugin::getAnalyzers);
return analyzers;
}

View File

@ -61,10 +61,7 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.elasticsearch.Version;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.analysis.PatternAnalyzer;
import org.elasticsearch.index.analysis.SnowballAnalyzer;
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import java.util.Locale;
@ -141,22 +138,6 @@ public enum PreBuiltAnalyzers {
}
},
PATTERN(CachingStrategy.ELASTICSEARCH) {
@Override
protected Analyzer create(Version version) {
return new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET);
}
},
STANDARD_HTML_STRIP(CachingStrategy.ELASTICSEARCH) {
@Override
protected Analyzer create(Version version) {
final Analyzer analyzer = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET);
analyzer.setVersion(version.luceneVersion);
return analyzer;
}
},
ARABIC {
@Override
protected Analyzer create(Version version) {
@ -484,7 +465,7 @@ public enum PreBuiltAnalyzers {
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
}
PreBuiltCacheFactory.PreBuiltCache<Analyzer> getCache() {
public PreBuiltCacheFactory.PreBuiltCache<Analyzer> getCache() {
return cache;
}

View File

@ -21,6 +21,8 @@ package org.elasticsearch.indices.analysis;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@ -36,8 +38,12 @@ public class PreBuiltCacheFactory {
public enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
public interface PreBuiltCache<T> {
T get(Version version);
void put(Version version, T t);
Collection<T> values();
}
private PreBuiltCacheFactory() {}
@ -71,6 +77,11 @@ public class PreBuiltCacheFactory {
public void put(Version version, T model) {
this.model = model;
}
@Override
public Collection<T> values() {
return Collections.singleton(model);
}
}
/**
@ -89,6 +100,11 @@ public class PreBuiltCacheFactory {
public void put(Version version, T model) {
mapModel.put(version, model);
}
@Override
public Collection<T> values() {
return mapModel.values();
}
}
/**
@ -107,5 +123,10 @@ public class PreBuiltCacheFactory {
public void put(org.elasticsearch.Version version, T model) {
mapModel.put(version.luceneVersion, model);
}
@Override
public Collection<T> values() {
return mapModel.values();
}
}
}

View File

@ -28,6 +28,7 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@ -92,6 +93,13 @@ public interface AnalysisPlugin {
return emptyMap();
}
/**
* Override to add additional pre-configured {@link Analyzer}s.
*/
default List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
return emptyList();
}
/**
* Override to add additional pre-configured {@link CharFilter}s.
*/

View File

@ -121,7 +121,7 @@ public class IndexModuleTests extends ESTestCase {
index = indexSettings.getIndex();
environment = TestEnvironment.newEnvironment(settings);
emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
emptyMap(), emptyMap(), emptyMap());
emptyMap(), emptyMap(), emptyMap(), emptyMap());
threadPool = new TestThreadPool("test");
circuitBreakerService = new NoneCircuitBreakerService();
PageCacheRecycler pageCacheRecycler = new PageCacheRecycler(settings);

View File

@ -41,6 +41,7 @@ import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import static java.util.Collections.emptyMap;
@ -48,6 +49,8 @@ import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
public class AnalysisRegistryTests extends ESTestCase {
private AnalysisRegistry emptyRegistry;
@ -58,7 +61,7 @@ public class AnalysisRegistryTests extends ESTestCase {
private static AnalysisRegistry emptyAnalysisRegistry(Settings settings) {
return new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
emptyMap(), emptyMap(), emptyMap());
emptyMap(), emptyMap(), emptyMap(), emptyMap());
}
private static IndexSettings indexSettingsOfCurrentVersion(Settings.Builder settings) {
@ -224,4 +227,16 @@ public class AnalysisRegistryTests extends ESTestCase {
indexAnalyzers.close();
indexAnalyzers.close();
}
public void testEnsureCloseInvocationProperlyDelegated() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
PreBuiltAnalyzerProviderFactory mock = mock(PreBuiltAnalyzerProviderFactory.class);
AnalysisRegistry registry = new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(),
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), Collections.singletonMap("key", mock));
registry.close();
verify(mock).close();
}
}

View File

@ -19,7 +19,9 @@
package org.elasticsearch.search.fetch.subphase.highlight;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
@ -32,6 +34,8 @@ import org.elasticsearch.common.settings.Settings.Builder;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.query.AbstractQueryBuilder;
import org.elasticsearch.index.query.IdsQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
@ -41,6 +45,8 @@ import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import org.elasticsearch.index.query.functionscore.RandomScoreFunctionBuilder;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
@ -63,6 +69,7 @@ import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import static java.util.Collections.singletonMap;
import static org.elasticsearch.client.Requests.searchRequest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
@ -106,7 +113,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class);
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockWhitespacePlugin.class);
}
public void testHighlightingWithStoredKeyword() throws IOException {
@ -1599,8 +1606,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put(indexSettings())
.put("analysis.analyzer.my_analyzer.type", "pattern")
.put("analysis.analyzer.my_analyzer.pattern", "\\s+")
.put("analysis.analyzer.my_analyzer.type", "mock_whitespace")
.build())
.addMapping("type", "text", "type=text,analyzer=my_analyzer"));
ensureGreen();
@ -1611,7 +1617,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
SearchResponse response = client().prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("text", "test"))
.highlighter(new HighlightBuilder().field("text")).execute().actionGet();
// PatternAnalyzer will throw an exception if it is resetted twice
// Mock tokenizer will throw an exception if it is resetted twice
assertHitCount(response, 1L);
}
@ -2976,4 +2982,22 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
}
}
public static class MockWhitespacePlugin extends Plugin implements AnalysisPlugin {
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
return singletonMap("mock_whitespace", (indexSettings, environment, name, settings) -> {
return new AbstractIndexAnalyzerProvider<Analyzer>(indexSettings, name, settings) {
MockAnalyzer instance = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
@Override
public Analyzer get() {
return instance;
}
};
});
}
}
}

View File

@ -67,7 +67,7 @@ public class WatcherPluginTests extends ESTestCase {
// ensure index module is not called, even if watches index is tried
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(Watch.INDEX, settings);
AnalysisRegistry registry = new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(), emptyMap(),
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
IndexModule indexModule = new IndexModule(indexSettings, registry);
// this will trip an assertion if the watcher indexing operation listener is null (which it is) but we try to add it
watcher.onIndexModule(indexModule);