Make PreBuiltAnalyzerProviderFactory plugable via AnalysisPlugin and
move `finger_print`, `pattern` and `standard_html_strip` analyzers to analysis-common module. (both AnalysisProvider and PreBuiltAnalyzerProvider) Changed PreBuiltAnalyzerProviderFactory to extend from PreConfiguredAnalysisComponent and changed to make sure that predefined analyzers are always instantiated with the current ES version and if an instance is requested for a different version then delegate to PreBuiltCache. This is similar to the behaviour that exists today in AnalysisRegistry.PreBuiltAnalysis and PreBuiltAnalyzerProviderFactory. (#31095) Relates to #23658
This commit is contained in:
parent
805648848d
commit
735d0e671a
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.analysis.common;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
|
@ -79,7 +80,9 @@ import org.apache.lucene.analysis.util.ElisionFilter;
|
||||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||||
import org.elasticsearch.common.logging.Loggers;
|
import org.elasticsearch.common.logging.Loggers;
|
||||||
import org.elasticsearch.common.regex.Regex;
|
import org.elasticsearch.common.regex.Regex;
|
||||||
|
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||||
|
@ -87,6 +90,7 @@ import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||||
|
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.tartarus.snowball.ext.DutchStemmer;
|
import org.tartarus.snowball.ext.DutchStemmer;
|
||||||
|
@ -103,6 +107,15 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||||
|
|
||||||
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(CommonAnalysisPlugin.class));
|
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(CommonAnalysisPlugin.class));
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
|
||||||
|
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
|
||||||
|
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
|
||||||
|
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
|
||||||
|
analyzers.put("pattern", PatternAnalyzerProvider::new);
|
||||||
|
return analyzers;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
|
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
|
||||||
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
|
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
|
||||||
|
@ -197,6 +210,16 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||||
return tokenizers;
|
return tokenizers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
|
||||||
|
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
|
||||||
|
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE,
|
||||||
|
version -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
|
||||||
|
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version ->
|
||||||
|
new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET)));
|
||||||
|
return analyzers;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
||||||
List<PreConfiguredCharFilter> filters = new ArrayList<>();
|
List<PreConfiguredCharFilter> filters = new ArrayList<>();
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
@ -35,7 +35,7 @@ public final class FingerprintAnalyzer extends Analyzer {
|
||||||
private final int maxOutputSize;
|
private final int maxOutputSize;
|
||||||
private final CharArraySet stopWords;
|
private final CharArraySet stopWords;
|
||||||
|
|
||||||
public FingerprintAnalyzer(CharArraySet stopWords, char separator, int maxOutputSize) {
|
FingerprintAnalyzer(CharArraySet stopWords, char separator, int maxOutputSize) {
|
||||||
this.separator = separator;
|
this.separator = separator;
|
||||||
this.maxOutputSize = maxOutputSize;
|
this.maxOutputSize = maxOutputSize;
|
||||||
this.stopWords = stopWords;
|
this.stopWords = stopWords;
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
@ -25,6 +25,8 @@ import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||||
|
import org.elasticsearch.index.analysis.Analysis;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -42,7 +44,7 @@ public class FingerprintAnalyzerProvider extends AbstractIndexAnalyzerProvider<A
|
||||||
|
|
||||||
private final FingerprintAnalyzer analyzer;
|
private final FingerprintAnalyzer analyzer;
|
||||||
|
|
||||||
public FingerprintAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
FingerprintAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
|
|
||||||
char separator = parseSeparator(settings);
|
char separator = parseSeparator(settings);
|
|
@ -21,15 +21,13 @@ package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
|
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
|
||||||
import org.elasticsearch.common.ParseField;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
|
|
||||||
|
|
||||||
import static org.elasticsearch.index.analysis.FingerprintAnalyzerProvider.DEFAULT_MAX_OUTPUT_SIZE;
|
import static org.elasticsearch.analysis.common.FingerprintAnalyzerProvider.DEFAULT_MAX_OUTPUT_SIZE;
|
||||||
import static org.elasticsearch.index.analysis.FingerprintAnalyzerProvider.MAX_OUTPUT_SIZE;
|
import static org.elasticsearch.analysis.common.FingerprintAnalyzerProvider.MAX_OUTPUT_SIZE;
|
||||||
|
|
||||||
public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
@ -35,7 +35,7 @@ public final class PatternAnalyzer extends Analyzer {
|
||||||
private final boolean lowercase;
|
private final boolean lowercase;
|
||||||
private final CharArraySet stopWords;
|
private final CharArraySet stopWords;
|
||||||
|
|
||||||
public PatternAnalyzer(Pattern pattern, boolean lowercase, CharArraySet stopWords) {
|
PatternAnalyzer(Pattern pattern, boolean lowercase, CharArraySet stopWords) {
|
||||||
this.pattern = pattern;
|
this.pattern = pattern;
|
||||||
this.lowercase = lowercase;
|
this.lowercase = lowercase;
|
||||||
this.stopWords = stopWords;
|
this.stopWords = stopWords;
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
@ -25,6 +25,8 @@ import org.elasticsearch.common.regex.Regex;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||||
|
import org.elasticsearch.index.analysis.Analysis;
|
||||||
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@ -32,7 +34,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analy
|
||||||
|
|
||||||
private final PatternAnalyzer analyzer;
|
private final PatternAnalyzer analyzer;
|
||||||
|
|
||||||
public PatternAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
PatternAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
|
|
||||||
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
|
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
|
@ -39,7 +39,7 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
|
||||||
super(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
super(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||||
}
|
}
|
||||||
|
|
||||||
public StandardHtmlStripAnalyzer(CharArraySet stopwords) {
|
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
|
||||||
super(stopwords);
|
super(stopwords);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
|
@ -25,12 +25,14 @@ import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||||
|
import org.elasticsearch.index.analysis.Analysis;
|
||||||
|
|
||||||
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
|
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
|
||||||
|
|
||||||
private final StandardHtmlStripAnalyzer analyzer;
|
private final StandardHtmlStripAnalyzer analyzer;
|
||||||
|
|
||||||
public StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
|
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
|
||||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
|
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
|
|
@ -1,4 +1,4 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to Elasticsearch under one or more contributor
|
* Licensed to Elasticsearch under one or more contributor
|
|
@ -1,4 +1,4 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to Elasticsearch under one or more contributor
|
* Licensed to Elasticsearch under one or more contributor
|
|
@ -37,3 +37,35 @@
|
||||||
analyzer: bengali
|
analyzer: bengali
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: বার }
|
- match: { tokens.0.token: বার }
|
||||||
|
|
||||||
|
---
|
||||||
|
"fingerprint":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: A1 B2 A1 D4 C3
|
||||||
|
analyzer: fingerprint
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: a1 b2 c3 d4 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"standard_html_strip":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: <bold/> <italic/>
|
||||||
|
analyzer: standard_html_strip
|
||||||
|
- length: { tokens: 2 }
|
||||||
|
- match: { tokens.0.token: bold }
|
||||||
|
- match: { tokens.1.token: italic }
|
||||||
|
|
||||||
|
---
|
||||||
|
"pattern":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: foo bar
|
||||||
|
analyzer: pattern
|
||||||
|
- length: { tokens: 2 }
|
||||||
|
- match: { tokens.0.token: foo }
|
||||||
|
- match: { tokens.1.token: bar }
|
||||||
|
|
|
@ -22,7 +22,6 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.elasticsearch.core.internal.io.IOUtils;
|
import org.elasticsearch.core.internal.io.IOUtils;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.Version;
|
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
|
@ -70,14 +69,16 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
|
Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
|
||||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
|
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
|
||||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
|
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
|
||||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
|
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers,
|
||||||
|
Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
|
||||||
this.environment = environment;
|
this.environment = environment;
|
||||||
this.charFilters = unmodifiableMap(charFilters);
|
this.charFilters = unmodifiableMap(charFilters);
|
||||||
this.tokenFilters = unmodifiableMap(tokenFilters);
|
this.tokenFilters = unmodifiableMap(tokenFilters);
|
||||||
this.tokenizers = unmodifiableMap(tokenizers);
|
this.tokenizers = unmodifiableMap(tokenizers);
|
||||||
this.analyzers = unmodifiableMap(analyzers);
|
this.analyzers = unmodifiableMap(analyzers);
|
||||||
this.normalizers = unmodifiableMap(normalizers);
|
this.normalizers = unmodifiableMap(normalizers);
|
||||||
prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
|
prebuiltAnalysis =
|
||||||
|
new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -398,13 +399,15 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
private PrebuiltAnalysis(
|
private PrebuiltAnalysis(
|
||||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
|
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
|
||||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
|
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
|
||||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
|
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers,
|
||||||
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap<>();
|
Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
|
||||||
|
|
||||||
// Analyzers
|
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap<>();
|
||||||
|
analyzerProviderFactories.putAll(preConfiguredAnalyzers);
|
||||||
|
// Pre-build analyzers
|
||||||
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
|
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
|
||||||
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
|
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
|
||||||
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
|
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, preBuiltAnalyzerEnum));
|
||||||
}
|
}
|
||||||
|
|
||||||
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
|
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
|
||||||
|
@ -429,17 +432,10 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
return analyzerProviderFactories.get(name);
|
return analyzerProviderFactories.get(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
Analyzer analyzer(String name) {
|
|
||||||
PreBuiltAnalyzerProviderFactory analyzerProviderFactory = (PreBuiltAnalyzerProviderFactory) analyzerProviderFactories.get(name);
|
|
||||||
if (analyzerProviderFactory == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return analyzerProviderFactory.analyzer();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
IOUtils.close(analyzerProviderFactories.values().stream().map((a) -> ((PreBuiltAnalyzerProviderFactory)a).analyzer()).collect(Collectors.toList()));
|
IOUtils.close(analyzerProviderFactories.values().stream()
|
||||||
|
.map((a) -> ((PreBuiltAnalyzerProviderFactory)a)).collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,41 +22,101 @@ package org.elasticsearch.index.analysis;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.core.internal.io.IOUtils;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
|
||||||
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
||||||
|
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class PreBuiltAnalyzerProviderFactory implements AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> {
|
public class PreBuiltAnalyzerProviderFactory extends PreConfiguredAnalysisComponent<AnalyzerProvider<?>> implements Closeable {
|
||||||
|
|
||||||
private final PreBuiltAnalyzerProvider analyzerProvider;
|
private final Function<Version, Analyzer> create;
|
||||||
|
private final PreBuiltAnalyzerProvider current;
|
||||||
|
|
||||||
public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) {
|
/**
|
||||||
analyzerProvider = new PreBuiltAnalyzerProvider(name, scope, analyzer);
|
* This constructor only exists to expose analyzers defined in {@link PreBuiltAnalyzers} as {@link PreBuiltAnalyzerProviderFactory}.
|
||||||
|
*/
|
||||||
|
PreBuiltAnalyzerProviderFactory(String name, PreBuiltAnalyzers preBuiltAnalyzer) {
|
||||||
|
super(name, new PreBuiltAnalyzersDelegateCache(name, preBuiltAnalyzer));
|
||||||
|
this.create = preBuiltAnalyzer::getAnalyzer;
|
||||||
|
current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, preBuiltAnalyzer.getAnalyzer(Version.CURRENT));
|
||||||
}
|
}
|
||||||
|
|
||||||
public AnalyzerProvider<?> create(String name, Settings settings) {
|
public PreBuiltAnalyzerProviderFactory(String name, PreBuiltCacheFactory.CachingStrategy cache, Function<Version, Analyzer> create) {
|
||||||
Version indexVersion = Version.indexCreated(settings);
|
super(name, cache);
|
||||||
if (!Version.CURRENT.equals(indexVersion)) {
|
this.create = create;
|
||||||
PreBuiltAnalyzers preBuiltAnalyzers = PreBuiltAnalyzers.getOrDefault(name, null);
|
this.current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, create.apply(Version.CURRENT));
|
||||||
if (preBuiltAnalyzers != null) {
|
|
||||||
Analyzer analyzer = preBuiltAnalyzers.getAnalyzer(indexVersion);
|
|
||||||
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return analyzerProvider;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AnalyzerProvider<?> get(IndexSettings indexSettings, Environment environment, String name, Settings settings)
|
public AnalyzerProvider<?> get(IndexSettings indexSettings,
|
||||||
throws IOException {
|
Environment environment,
|
||||||
return create(name, settings);
|
String name,
|
||||||
|
Settings settings) throws IOException {
|
||||||
|
Version versionCreated = Version.indexCreated(settings);
|
||||||
|
if (Version.CURRENT.equals(versionCreated) == false) {
|
||||||
|
return super.get(indexSettings, environment, name, settings);
|
||||||
|
} else {
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AnalyzerProvider<?> create(Version version) {
|
||||||
|
assert Version.CURRENT.equals(version) == false;
|
||||||
|
return new PreBuiltAnalyzerProvider(getName(), AnalyzerScope.INDICES, create.apply(version));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
List<Closeable> closeables = cache.values().stream()
|
||||||
|
.map(AnalyzerProvider::get)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
closeables.add(current.get());
|
||||||
|
IOUtils.close(closeables);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A special cache that closes the gap between PreBuiltAnalyzers and PreBuiltAnalyzerProviderFactory.
|
||||||
|
*
|
||||||
|
* This can be removed when all analyzers have been moved away from PreBuiltAnalyzers to
|
||||||
|
* PreBuiltAnalyzerProviderFactory either in server or analysis-common.
|
||||||
|
*/
|
||||||
|
static class PreBuiltAnalyzersDelegateCache implements PreBuiltCacheFactory.PreBuiltCache<AnalyzerProvider<?>> {
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
private final PreBuiltAnalyzers preBuiltAnalyzer;
|
||||||
|
|
||||||
|
private PreBuiltAnalyzersDelegateCache(String name, PreBuiltAnalyzers preBuiltAnalyzer) {
|
||||||
|
this.name = name;
|
||||||
|
this.preBuiltAnalyzer = preBuiltAnalyzer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AnalyzerProvider<?> get(Version version) {
|
||||||
|
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, preBuiltAnalyzer.getAnalyzer(version));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void put(Version version, AnalyzerProvider<?> analyzerProvider) {
|
||||||
|
// No need to put, because we delegate in get() directly to PreBuiltAnalyzers which already caches.
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<AnalyzerProvider<?>> values() {
|
||||||
|
return preBuiltAnalyzer.getCache().values().stream()
|
||||||
|
// Wrap the analyzer instance in a PreBuiltAnalyzerProvider, this is what PreBuiltAnalyzerProviderFactory#close expects
|
||||||
|
// (other caches are not directly caching analyzers, but analyzer provider instead.
|
||||||
|
.map(analyzer -> new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer))
|
||||||
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
public Analyzer analyzer() {
|
|
||||||
return analyzerProvider.get();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,13 +33,18 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public abstract class PreConfiguredAnalysisComponent<T> implements AnalysisModule.AnalysisProvider<T> {
|
public abstract class PreConfiguredAnalysisComponent<T> implements AnalysisModule.AnalysisProvider<T> {
|
||||||
private final String name;
|
private final String name;
|
||||||
private final PreBuiltCacheFactory.PreBuiltCache<T> cache;
|
protected final PreBuiltCacheFactory.PreBuiltCache<T> cache;
|
||||||
|
|
||||||
protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.CachingStrategy cache) {
|
protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.CachingStrategy cache) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.cache = PreBuiltCacheFactory.getCache(cache);
|
this.cache = PreBuiltCacheFactory.getCache(cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.PreBuiltCache<T> cache) {
|
||||||
|
this.name = name;
|
||||||
|
this.cache = cache;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
|
public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
|
||||||
Version versionCreated = Version.indexCreated(settings);
|
Version versionCreated = Version.indexCreated(settings);
|
||||||
|
|
|
@ -43,7 +43,6 @@ import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
|
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
|
|
||||||
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
|
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
|
||||||
|
@ -59,9 +58,9 @@ import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
|
|
||||||
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
|
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
|
||||||
|
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||||
|
@ -73,7 +72,6 @@ import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
|
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
|
import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzerProvider;
|
|
||||||
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
|
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.StopAnalyzerProvider;
|
import org.elasticsearch.index.analysis.StopAnalyzerProvider;
|
||||||
|
@ -122,11 +120,12 @@ public final class AnalysisModule {
|
||||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
|
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
|
||||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
|
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
|
||||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
|
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
|
||||||
|
Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);
|
||||||
|
|
||||||
analysisRegistry = new AnalysisRegistry(environment,
|
analysisRegistry = new AnalysisRegistry(environment,
|
||||||
charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
|
charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
|
||||||
analyzers.getRegistry(), normalizers.getRegistry(),
|
analyzers.getRegistry(), normalizers.getRegistry(),
|
||||||
preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
|
preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
|
||||||
}
|
}
|
||||||
|
|
||||||
HunspellService getHunspellService() {
|
HunspellService getHunspellService() {
|
||||||
|
@ -162,6 +161,16 @@ public final class AnalysisModule {
|
||||||
return tokenFilters;
|
return tokenFilters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Map<String, PreBuiltAnalyzerProviderFactory> setupPreBuiltAnalyzerProviderFactories(List<AnalysisPlugin> plugins) {
|
||||||
|
NamedRegistry<PreBuiltAnalyzerProviderFactory> preConfiguredCharFilters = new NamedRegistry<>("pre-built analyzer");
|
||||||
|
for (AnalysisPlugin plugin : plugins) {
|
||||||
|
for (PreBuiltAnalyzerProviderFactory factory : plugin.getPreBuiltAnalyzerProviderFactories()) {
|
||||||
|
preConfiguredCharFilters.register(factory.getName(), factory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return unmodifiableMap(preConfiguredCharFilters.getRegistry());
|
||||||
|
}
|
||||||
|
|
||||||
static Map<String, PreConfiguredCharFilter> setupPreConfiguredCharFilters(List<AnalysisPlugin> plugins) {
|
static Map<String, PreConfiguredCharFilter> setupPreConfiguredCharFilters(List<AnalysisPlugin> plugins) {
|
||||||
NamedRegistry<PreConfiguredCharFilter> preConfiguredCharFilters = new NamedRegistry<>("pre-configured char_filter");
|
NamedRegistry<PreConfiguredCharFilter> preConfiguredCharFilters = new NamedRegistry<>("pre-configured char_filter");
|
||||||
|
|
||||||
|
@ -232,12 +241,10 @@ public final class AnalysisModule {
|
||||||
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = new NamedRegistry<>("analyzer");
|
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = new NamedRegistry<>("analyzer");
|
||||||
analyzers.register("default", StandardAnalyzerProvider::new);
|
analyzers.register("default", StandardAnalyzerProvider::new);
|
||||||
analyzers.register("standard", StandardAnalyzerProvider::new);
|
analyzers.register("standard", StandardAnalyzerProvider::new);
|
||||||
analyzers.register("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
|
|
||||||
analyzers.register("simple", SimpleAnalyzerProvider::new);
|
analyzers.register("simple", SimpleAnalyzerProvider::new);
|
||||||
analyzers.register("stop", StopAnalyzerProvider::new);
|
analyzers.register("stop", StopAnalyzerProvider::new);
|
||||||
analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
|
analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
|
||||||
analyzers.register("keyword", KeywordAnalyzerProvider::new);
|
analyzers.register("keyword", KeywordAnalyzerProvider::new);
|
||||||
analyzers.register("pattern", PatternAnalyzerProvider::new);
|
|
||||||
analyzers.register("snowball", SnowballAnalyzerProvider::new);
|
analyzers.register("snowball", SnowballAnalyzerProvider::new);
|
||||||
analyzers.register("arabic", ArabicAnalyzerProvider::new);
|
analyzers.register("arabic", ArabicAnalyzerProvider::new);
|
||||||
analyzers.register("armenian", ArmenianAnalyzerProvider::new);
|
analyzers.register("armenian", ArmenianAnalyzerProvider::new);
|
||||||
|
@ -274,7 +281,6 @@ public final class AnalysisModule {
|
||||||
analyzers.register("swedish", SwedishAnalyzerProvider::new);
|
analyzers.register("swedish", SwedishAnalyzerProvider::new);
|
||||||
analyzers.register("turkish", TurkishAnalyzerProvider::new);
|
analyzers.register("turkish", TurkishAnalyzerProvider::new);
|
||||||
analyzers.register("thai", ThaiAnalyzerProvider::new);
|
analyzers.register("thai", ThaiAnalyzerProvider::new);
|
||||||
analyzers.register("fingerprint", FingerprintAnalyzerProvider::new);
|
|
||||||
analyzers.extractAndRegister(plugins, AnalysisPlugin::getAnalyzers);
|
analyzers.extractAndRegister(plugins, AnalysisPlugin::getAnalyzers);
|
||||||
return analyzers;
|
return analyzers;
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,10 +61,7 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.regex.Regex;
|
|
||||||
import org.elasticsearch.index.analysis.PatternAnalyzer;
|
|
||||||
import org.elasticsearch.index.analysis.SnowballAnalyzer;
|
import org.elasticsearch.index.analysis.SnowballAnalyzer;
|
||||||
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer;
|
|
||||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||||
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -141,22 +138,6 @@ public enum PreBuiltAnalyzers {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
PATTERN(CachingStrategy.ELASTICSEARCH) {
|
|
||||||
@Override
|
|
||||||
protected Analyzer create(Version version) {
|
|
||||||
return new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
STANDARD_HTML_STRIP(CachingStrategy.ELASTICSEARCH) {
|
|
||||||
@Override
|
|
||||||
protected Analyzer create(Version version) {
|
|
||||||
final Analyzer analyzer = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET);
|
|
||||||
analyzer.setVersion(version.luceneVersion);
|
|
||||||
return analyzer;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
ARABIC {
|
ARABIC {
|
||||||
@Override
|
@Override
|
||||||
protected Analyzer create(Version version) {
|
protected Analyzer create(Version version) {
|
||||||
|
@ -484,7 +465,7 @@ public enum PreBuiltAnalyzers {
|
||||||
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
|
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
|
||||||
}
|
}
|
||||||
|
|
||||||
PreBuiltCacheFactory.PreBuiltCache<Analyzer> getCache() {
|
public PreBuiltCacheFactory.PreBuiltCache<Analyzer> getCache() {
|
||||||
return cache;
|
return cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,8 @@ package org.elasticsearch.indices.analysis;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -36,8 +38,12 @@ public class PreBuiltCacheFactory {
|
||||||
public enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
|
public enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
|
||||||
|
|
||||||
public interface PreBuiltCache<T> {
|
public interface PreBuiltCache<T> {
|
||||||
|
|
||||||
T get(Version version);
|
T get(Version version);
|
||||||
|
|
||||||
void put(Version version, T t);
|
void put(Version version, T t);
|
||||||
|
|
||||||
|
Collection<T> values();
|
||||||
}
|
}
|
||||||
|
|
||||||
private PreBuiltCacheFactory() {}
|
private PreBuiltCacheFactory() {}
|
||||||
|
@ -71,6 +77,11 @@ public class PreBuiltCacheFactory {
|
||||||
public void put(Version version, T model) {
|
public void put(Version version, T model) {
|
||||||
this.model = model;
|
this.model = model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<T> values() {
|
||||||
|
return Collections.singleton(model);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -89,6 +100,11 @@ public class PreBuiltCacheFactory {
|
||||||
public void put(Version version, T model) {
|
public void put(Version version, T model) {
|
||||||
mapModel.put(version, model);
|
mapModel.put(version, model);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<T> values() {
|
||||||
|
return mapModel.values();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -107,5 +123,10 @@ public class PreBuiltCacheFactory {
|
||||||
public void put(org.elasticsearch.Version version, T model) {
|
public void put(org.elasticsearch.Version version, T model) {
|
||||||
mapModel.put(version.luceneVersion, model);
|
mapModel.put(version.luceneVersion, model);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<T> values() {
|
||||||
|
return mapModel.values();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||||
|
@ -92,6 +93,13 @@ public interface AnalysisPlugin {
|
||||||
return emptyMap();
|
return emptyMap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Override to add additional pre-configured {@link Analyzer}s.
|
||||||
|
*/
|
||||||
|
default List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
|
||||||
|
return emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Override to add additional pre-configured {@link CharFilter}s.
|
* Override to add additional pre-configured {@link CharFilter}s.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -121,7 +121,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
index = indexSettings.getIndex();
|
index = indexSettings.getIndex();
|
||||||
environment = TestEnvironment.newEnvironment(settings);
|
environment = TestEnvironment.newEnvironment(settings);
|
||||||
emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
|
emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
|
||||||
emptyMap(), emptyMap(), emptyMap());
|
emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||||
threadPool = new TestThreadPool("test");
|
threadPool = new TestThreadPool("test");
|
||||||
circuitBreakerService = new NoneCircuitBreakerService();
|
circuitBreakerService = new NoneCircuitBreakerService();
|
||||||
PageCacheRecycler pageCacheRecycler = new PageCacheRecycler(settings);
|
PageCacheRecycler pageCacheRecycler = new PageCacheRecycler(settings);
|
||||||
|
|
|
@ -41,6 +41,7 @@ import org.elasticsearch.test.IndexSettingsModule;
|
||||||
import org.elasticsearch.test.VersionUtils;
|
import org.elasticsearch.test.VersionUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static java.util.Collections.emptyMap;
|
import static java.util.Collections.emptyMap;
|
||||||
|
@ -48,6 +49,8 @@ import static java.util.Collections.singletonList;
|
||||||
import static java.util.Collections.singletonMap;
|
import static java.util.Collections.singletonMap;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.instanceOf;
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
|
|
||||||
public class AnalysisRegistryTests extends ESTestCase {
|
public class AnalysisRegistryTests extends ESTestCase {
|
||||||
private AnalysisRegistry emptyRegistry;
|
private AnalysisRegistry emptyRegistry;
|
||||||
|
@ -58,7 +61,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
|
|
||||||
private static AnalysisRegistry emptyAnalysisRegistry(Settings settings) {
|
private static AnalysisRegistry emptyAnalysisRegistry(Settings settings) {
|
||||||
return new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
|
return new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
|
||||||
emptyMap(), emptyMap(), emptyMap());
|
emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IndexSettings indexSettingsOfCurrentVersion(Settings.Builder settings) {
|
private static IndexSettings indexSettingsOfCurrentVersion(Settings.Builder settings) {
|
||||||
|
@ -224,4 +227,16 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
indexAnalyzers.close();
|
indexAnalyzers.close();
|
||||||
indexAnalyzers.close();
|
indexAnalyzers.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testEnsureCloseInvocationProperlyDelegated() throws IOException {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
PreBuiltAnalyzerProviderFactory mock = mock(PreBuiltAnalyzerProviderFactory.class);
|
||||||
|
AnalysisRegistry registry = new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(),
|
||||||
|
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), Collections.singletonMap("key", mock));
|
||||||
|
|
||||||
|
registry.close();
|
||||||
|
verify(mock).close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,9 @@
|
||||||
package org.elasticsearch.search.fetch.subphase.highlight;
|
package org.elasticsearch.search.fetch.subphase.highlight;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.search.join.ScoreMode;
|
import org.apache.lucene.search.join.ScoreMode;
|
||||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||||
|
@ -32,6 +34,8 @@ import org.elasticsearch.common.settings.Settings.Builder;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.common.xcontent.XContentType;
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||||
|
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
||||||
import org.elasticsearch.index.query.AbstractQueryBuilder;
|
import org.elasticsearch.index.query.AbstractQueryBuilder;
|
||||||
import org.elasticsearch.index.query.IdsQueryBuilder;
|
import org.elasticsearch.index.query.IdsQueryBuilder;
|
||||||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||||
|
@ -41,6 +45,8 @@ import org.elasticsearch.index.query.QueryBuilder;
|
||||||
import org.elasticsearch.index.query.QueryBuilders;
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
|
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
|
||||||
import org.elasticsearch.index.query.functionscore.RandomScoreFunctionBuilder;
|
import org.elasticsearch.index.query.functionscore.RandomScoreFunctionBuilder;
|
||||||
|
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||||
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.rest.RestStatus;
|
import org.elasticsearch.rest.RestStatus;
|
||||||
import org.elasticsearch.search.SearchHit;
|
import org.elasticsearch.search.SearchHit;
|
||||||
|
@ -63,6 +69,7 @@ import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static java.util.Collections.singletonMap;
|
||||||
import static org.elasticsearch.client.Requests.searchRequest;
|
import static org.elasticsearch.client.Requests.searchRequest;
|
||||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
|
||||||
|
@ -106,7 +113,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class);
|
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockWhitespacePlugin.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testHighlightingWithStoredKeyword() throws IOException {
|
public void testHighlightingWithStoredKeyword() throws IOException {
|
||||||
|
@ -1599,8 +1606,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
assertAcked(prepareCreate("test")
|
assertAcked(prepareCreate("test")
|
||||||
.setSettings(Settings.builder()
|
.setSettings(Settings.builder()
|
||||||
.put(indexSettings())
|
.put(indexSettings())
|
||||||
.put("analysis.analyzer.my_analyzer.type", "pattern")
|
.put("analysis.analyzer.my_analyzer.type", "mock_whitespace")
|
||||||
.put("analysis.analyzer.my_analyzer.pattern", "\\s+")
|
|
||||||
.build())
|
.build())
|
||||||
.addMapping("type", "text", "type=text,analyzer=my_analyzer"));
|
.addMapping("type", "text", "type=text,analyzer=my_analyzer"));
|
||||||
ensureGreen();
|
ensureGreen();
|
||||||
|
@ -1611,7 +1617,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
SearchResponse response = client().prepareSearch("test")
|
SearchResponse response = client().prepareSearch("test")
|
||||||
.setQuery(QueryBuilders.matchQuery("text", "test"))
|
.setQuery(QueryBuilders.matchQuery("text", "test"))
|
||||||
.highlighter(new HighlightBuilder().field("text")).execute().actionGet();
|
.highlighter(new HighlightBuilder().field("text")).execute().actionGet();
|
||||||
// PatternAnalyzer will throw an exception if it is resetted twice
|
// Mock tokenizer will throw an exception if it is resetted twice
|
||||||
assertHitCount(response, 1L);
|
assertHitCount(response, 1L);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2976,4 +2982,22 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
|
assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class MockWhitespacePlugin extends Plugin implements AnalysisPlugin {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
|
||||||
|
return singletonMap("mock_whitespace", (indexSettings, environment, name, settings) -> {
|
||||||
|
return new AbstractIndexAnalyzerProvider<Analyzer>(indexSettings, name, settings) {
|
||||||
|
|
||||||
|
MockAnalyzer instance = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Analyzer get() {
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,7 +67,7 @@ public class WatcherPluginTests extends ESTestCase {
|
||||||
// ensure index module is not called, even if watches index is tried
|
// ensure index module is not called, even if watches index is tried
|
||||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(Watch.INDEX, settings);
|
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(Watch.INDEX, settings);
|
||||||
AnalysisRegistry registry = new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(), emptyMap(),
|
AnalysisRegistry registry = new AnalysisRegistry(TestEnvironment.newEnvironment(settings), emptyMap(), emptyMap(), emptyMap(),
|
||||||
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||||
IndexModule indexModule = new IndexModule(indexSettings, registry);
|
IndexModule indexModule = new IndexModule(indexSettings, registry);
|
||||||
// this will trip an assertion if the watcher indexing operation listener is null (which it is) but we try to add it
|
// this will trip an assertion if the watcher indexing operation listener is null (which it is) but we try to add it
|
||||||
watcher.onIndexModule(indexModule);
|
watcher.onIndexModule(indexModule);
|
||||||
|
|
Loading…
Reference in New Issue