Add version to prebuilt analyzers
This patch takes the version of the created index into account when a prebuilt analyzer is created. So, if an index was created with 0.90.4, then the prebuilt analyzers will be the same than on the 0.90.4 release. One reason for this feature is the possibility to change pre built analyzers like the standard one. The patch tries to reuse analyzers as mutch as possible. So even if version X.Y.Z and X.Y.A use the same lucene analyzers, the same instance is reused in order to prevent overcreation of lucene analyzer instances. Closes #3790
This commit is contained in:
parent
c9dab6991e
commit
ec0880df45
|
@ -252,7 +252,9 @@ public class MetaDataCreateIndexService extends AbstractComponent {
|
|||
indexSettingsBuilder.put(SETTING_AUTO_EXPAND_REPLICAS, settings.get(SETTING_AUTO_EXPAND_REPLICAS));
|
||||
}
|
||||
|
||||
if (indexSettingsBuilder.get(SETTING_VERSION_CREATED) == null) {
|
||||
indexSettingsBuilder.put(SETTING_VERSION_CREATED, version);
|
||||
}
|
||||
indexSettingsBuilder.put(SETTING_UUID, Strings.randomBase64UUID());
|
||||
|
||||
Settings actualIndexSettings = indexSettingsBuilder.build();
|
||||
|
|
|
@ -387,7 +387,7 @@ public class AnalysisModule extends AbstractModule {
|
|||
}
|
||||
|
||||
|
||||
// go over the tokenizers in the bindings and register the ones that are not configured
|
||||
// go over the analyzers in the bindings and register the ones that are not configured
|
||||
for (Map.Entry<String, Class<? extends AnalyzerProvider>> entry : analyzersBindings.analyzers.entrySet()) {
|
||||
String analyzerName = entry.getKey();
|
||||
Class<? extends AnalyzerProvider> clazz = entry.getValue();
|
||||
|
@ -408,7 +408,6 @@ public class AnalysisModule extends AbstractModule {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
bind(AnalysisService.class).in(Scopes.SINGLETON);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ package org.elasticsearch.index.analysis;
|
|||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.component.CloseableComponent;
|
||||
|
@ -188,14 +190,13 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
if (indicesAnalysisService != null) {
|
||||
for (Map.Entry<String, PreBuiltAnalyzerProviderFactory> entry : indicesAnalysisService.analyzerProviderFactories().entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Version indexVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
|
||||
if (!analyzerProviders.containsKey(name)) {
|
||||
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
name = Strings.toCamelCase(entry.getKey());
|
||||
if (!name.equals(entry.getKey())) {
|
||||
if (!analyzerProviders.containsKey(name)) {
|
||||
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build()));
|
||||
}
|
||||
String camelCaseName = Strings.toCamelCase(name);
|
||||
if (!camelCaseName.equals(entry.getKey()) && !analyzerProviders.containsKey(camelCaseName)) {
|
||||
analyzerProviders.put(camelCaseName, entry.getValue().create(name, ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,12 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -30,15 +35,17 @@ public class PreBuiltAnalyzerProviderFactory implements AnalyzerProviderFactory
|
|||
private final PreBuiltAnalyzerProvider analyzerProvider;
|
||||
|
||||
public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) {
|
||||
this(new PreBuiltAnalyzerProvider(name, scope, analyzer));
|
||||
}
|
||||
|
||||
public PreBuiltAnalyzerProviderFactory(PreBuiltAnalyzerProvider analyzerProvider) {
|
||||
this.analyzerProvider = analyzerProvider;
|
||||
analyzerProvider = new PreBuiltAnalyzerProvider(name, scope, analyzer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AnalyzerProvider create(String name, Settings settings) {
|
||||
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
|
||||
if (!Version.CURRENT.equals(indexVersion)) {
|
||||
Analyzer analyzer = PreBuiltAnalyzers.valueOf(name.toUpperCase(Locale.ROOT)).getAnalyzer(indexVersion);
|
||||
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer);
|
||||
}
|
||||
|
||||
return analyzerProvider;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,66 +22,34 @@ package org.elasticsearch.indices.analysis;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
|
||||
import org.apache.lucene.analysis.commongrams.*;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||
import org.apache.lucene.analysis.core.*;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.en.KStemFilter;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||
import org.apache.lucene.analysis.ga.IrishAnalyzer;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
|
||||
import org.apache.lucene.analysis.miscellaneous.*;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
||||
import org.apache.lucene.analysis.pattern.PatternTokenizer;
|
||||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.*;
|
||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
@ -91,6 +59,7 @@ import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
|||
import org.elasticsearch.index.analysis.*;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS;
|
||||
|
@ -101,7 +70,6 @@ import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_
|
|||
public class IndicesAnalysisService extends AbstractComponent {
|
||||
|
||||
private final Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = ConcurrentCollections.newConcurrentMap();
|
||||
|
||||
private final Map<String, PreBuiltTokenizerFactoryFactory> tokenizerFactories = ConcurrentCollections.newConcurrentMap();
|
||||
private final Map<String, PreBuiltTokenFilterFactoryFactory> tokenFilterFactories = ConcurrentCollections.newConcurrentMap();
|
||||
private final Map<String, PreBuiltCharFilterFactoryFactory> charFilterFactories = ConcurrentCollections.newConcurrentMap();
|
||||
|
@ -114,52 +82,10 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
public IndicesAnalysisService(Settings settings) {
|
||||
super(settings);
|
||||
|
||||
StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Lucene.ANALYZER_VERSION);
|
||||
analyzerProviderFactories.put("default", new PreBuiltAnalyzerProviderFactory("default", AnalyzerScope.INDICES, standardAnalyzer));
|
||||
analyzerProviderFactories.put("standard", new PreBuiltAnalyzerProviderFactory("standard", AnalyzerScope.INDICES, standardAnalyzer));
|
||||
analyzerProviderFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDICES, new KeywordAnalyzer()));
|
||||
analyzerProviderFactories.put("stop", new PreBuiltAnalyzerProviderFactory("stop", AnalyzerScope.INDICES, new StopAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("whitespace", new PreBuiltAnalyzerProviderFactory("whitespace", AnalyzerScope.INDICES, new WhitespaceAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("simple", new PreBuiltAnalyzerProviderFactory("simple", AnalyzerScope.INDICES, new SimpleAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("classic", new PreBuiltAnalyzerProviderFactory("classic", AnalyzerScope.INDICES, new ClassicAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
|
||||
// extended ones
|
||||
analyzerProviderFactories.put("pattern", new PreBuiltAnalyzerProviderFactory("pattern", AnalyzerScope.INDICES, new PatternAnalyzer(Lucene.ANALYZER_VERSION, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
|
||||
analyzerProviderFactories.put("snowball", new PreBuiltAnalyzerProviderFactory("snowball", AnalyzerScope.INDICES, new SnowballAnalyzer(Lucene.ANALYZER_VERSION, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
|
||||
analyzerProviderFactories.put("standard_html_strip", new PreBuiltAnalyzerProviderFactory("standard_html_strip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
|
||||
analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("armenian", new PreBuiltAnalyzerProviderFactory("armenian", AnalyzerScope.INDICES, new ArmenianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("basque", new PreBuiltAnalyzerProviderFactory("basque", AnalyzerScope.INDICES, new BasqueAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("brazilian", new PreBuiltAnalyzerProviderFactory("brazilian", AnalyzerScope.INDICES, new BrazilianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("bulgarian", new PreBuiltAnalyzerProviderFactory("bulgarian", AnalyzerScope.INDICES, new BulgarianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("catalan", new PreBuiltAnalyzerProviderFactory("catalan", AnalyzerScope.INDICES, new CatalanAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("chinese", new PreBuiltAnalyzerProviderFactory("chinese", AnalyzerScope.INDICES, new ChineseAnalyzer()));
|
||||
analyzerProviderFactories.put("cjk", new PreBuiltAnalyzerProviderFactory("cjk", AnalyzerScope.INDICES, new CJKAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("czech", new PreBuiltAnalyzerProviderFactory("czech", AnalyzerScope.INDICES, new CzechAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("dutch", new PreBuiltAnalyzerProviderFactory("dutch", AnalyzerScope.INDICES, new DutchAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("danish", new PreBuiltAnalyzerProviderFactory("danish", AnalyzerScope.INDICES, new DanishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("english", new PreBuiltAnalyzerProviderFactory("english", AnalyzerScope.INDICES, new EnglishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("finnish", new PreBuiltAnalyzerProviderFactory("finnish", AnalyzerScope.INDICES, new FinnishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("french", new PreBuiltAnalyzerProviderFactory("french", AnalyzerScope.INDICES, new FrenchAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("galician", new PreBuiltAnalyzerProviderFactory("galician", AnalyzerScope.INDICES, new GalicianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("german", new PreBuiltAnalyzerProviderFactory("german", AnalyzerScope.INDICES, new GermanAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("greek", new PreBuiltAnalyzerProviderFactory("greek", AnalyzerScope.INDICES, new GreekAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("irish", new PreBuiltAnalyzerProviderFactory("irish", AnalyzerScope.INDICES, new IrishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("latvian", new PreBuiltAnalyzerProviderFactory("latvian", AnalyzerScope.INDICES, new LatvianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("persian", new PreBuiltAnalyzerProviderFactory("persian", AnalyzerScope.INDICES, new PersianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("portuguese", new PreBuiltAnalyzerProviderFactory("portuguese", AnalyzerScope.INDICES, new PortugueseAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("romanian", new PreBuiltAnalyzerProviderFactory("romanian", AnalyzerScope.INDICES, new RomanianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("russian", new PreBuiltAnalyzerProviderFactory("russian", AnalyzerScope.INDICES, new RussianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("spanish", new PreBuiltAnalyzerProviderFactory("spanish", AnalyzerScope.INDICES, new SpanishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("swedish", new PreBuiltAnalyzerProviderFactory("swedish", AnalyzerScope.INDICES, new SwedishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("turkish", new PreBuiltAnalyzerProviderFactory("turkish", AnalyzerScope.INDICES, new TurkishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("thai", new PreBuiltAnalyzerProviderFactory("thai", AnalyzerScope.INDICES, new ThaiAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
|
||||
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
|
||||
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
|
||||
}
|
||||
|
||||
// Base Tokenizers
|
||||
tokenizerFactories.put("standard", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
|
||||
|
@ -771,7 +697,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
}
|
||||
|
||||
public boolean hasAnalyzer(String name) {
|
||||
return analyzer(name) != null;
|
||||
return analyzerProviderFactories.containsKey(name);
|
||||
}
|
||||
|
||||
public Analyzer analyzer(String name) {
|
||||
|
|
|
@ -0,0 +1,439 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianAnalyzer;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.ga.IrishAnalyzer;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
|
||||
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public enum PreBuiltAnalyzers {
|
||||
|
||||
STANDARD() {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new StandardAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
DEFAULT {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
// by calling get analyzer we are ensuring reuse of the same STANDARD analyzer for DEFAULT!
|
||||
// this call does not create a new instance
|
||||
return STANDARD.getAnalyzer(version);
|
||||
}
|
||||
},
|
||||
|
||||
KEYWORD(CachingStrategy.ONE) {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new KeywordAnalyzer();
|
||||
}
|
||||
},
|
||||
|
||||
STOP {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new StopAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
WHITESPACE {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new WhitespaceAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
SIMPLE {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new SimpleAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
CLASSIC {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new ClassicAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
SNOWBALL {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new SnowballAnalyzer(version.luceneVersion, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
}
|
||||
},
|
||||
|
||||
PATTERN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new PatternAnalyzer(version.luceneVersion, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
}
|
||||
},
|
||||
|
||||
STANDARD_HTML_STRIP {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new StandardHtmlStripAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
ARABIC {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new ArabicAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
ARMENIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new ArmenianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
BASQUE {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new BasqueAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
BRAZILIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new BrazilianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
BULGARIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new BulgarianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
CATALAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new CatalanAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
CHINESE(CachingStrategy.ONE) {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new ChineseAnalyzer();
|
||||
}
|
||||
},
|
||||
|
||||
CJK {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new CJKAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
CZECH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new CzechAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
DUTCH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new DutchAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
DANISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new DanishAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
ENGLISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new EnglishAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
FINNISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new FinnishAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
FRENCH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new FrenchAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
GALICIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new GalicianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
GERMAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new GermanAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
GREEK {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new GreekAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
HINDI {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new HindiAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
HUNGARIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new HungarianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
INDONESIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new IndonesianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
IRISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new IrishAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
ITALIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new ItalianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
LATVIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new LatvianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
NORWEGIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new NorwegianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
PERSIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new PersianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
PORTUGUESE {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new PortugueseAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
ROMANIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new RomanianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
RUSSIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new RussianAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
SPANISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new SpanishAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
SWEDISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new SwedishAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
TURKISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new TurkishAnalyzer(version.luceneVersion);
|
||||
}
|
||||
},
|
||||
|
||||
THAI {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
return new ThaiAnalyzer(version.luceneVersion);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* The strategy of caching the analyzer
|
||||
*
|
||||
* ONE Exactly one version is stored. Useful for analyzers which do not store version information
|
||||
* LUCENE Exactly one version for each lucene version is stored. Useful to prevent different analyzers with the same version
|
||||
* ELASTICSEARCH Exactly one version per elasticsearch version is stored. Useful if you change an analyzer between elasticsearch releases, when the lucene version does not change
|
||||
*/
|
||||
private static enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
|
||||
|
||||
private CachingStrategy cachingStrategy;
|
||||
protected final Map<Version, Analyzer> cachedAnalyzers = Maps.newHashMapWithExpectedSize(2);
|
||||
|
||||
PreBuiltAnalyzers() {
|
||||
this(CachingStrategy.LUCENE);
|
||||
}
|
||||
|
||||
PreBuiltAnalyzers(CachingStrategy cachingStrategy) {
|
||||
this.cachingStrategy = cachingStrategy;
|
||||
}
|
||||
|
||||
abstract protected Analyzer create(Version version);
|
||||
|
||||
public Map<Version, Analyzer> getCachedAnalyzers() {
|
||||
return ImmutableMap.copyOf(cachedAnalyzers);
|
||||
}
|
||||
|
||||
public synchronized Analyzer getAnalyzer(Version version) {
|
||||
Analyzer analyzer = getCachedAnalyzer(version);
|
||||
if (analyzer == null) {
|
||||
analyzer = this.create(version);
|
||||
}
|
||||
|
||||
if (!cachedAnalyzers.containsKey(version)) {
|
||||
cachedAnalyzers.put(version, analyzer);
|
||||
}
|
||||
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
private Analyzer getCachedAnalyzer(Version version) {
|
||||
switch (this.cachingStrategy) {
|
||||
case ONE:
|
||||
// method to return the first found analyzer in the cache
|
||||
if (cachedAnalyzers.size() > 0) {
|
||||
return (Analyzer) cachedAnalyzers.values().toArray()[0];
|
||||
}
|
||||
break;
|
||||
case LUCENE:
|
||||
// find already cached analyzers with the same lucene version
|
||||
for (Version elasticsearchVersion : cachedAnalyzers.keySet()) {
|
||||
if (elasticsearchVersion.luceneVersion.equals(version.luceneVersion)) {
|
||||
return cachedAnalyzers.get(elasticsearchVersion);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ELASTICSEARCH:
|
||||
// check only for the same es version
|
||||
if (cachedAnalyzers.containsKey(version)) {
|
||||
return cachedAnalyzers.get(version);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new ElasticSearchException("No action configured for caching strategy[" + this.cachingStrategy + "]");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -24,11 +24,13 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.inject.Injector;
|
||||
import org.elasticsearch.common.inject.ModulesBuilder;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
@ -45,21 +47,35 @@ import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
|
|||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AnalysisModuleTests extends ElasticsearchTestCase {
|
||||
|
||||
private Injector injector;
|
||||
|
||||
public AnalysisService getAnalysisService(Settings settings) {
|
||||
Index index = new Index("test");
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
|
||||
injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(index, settings),
|
||||
new IndexNameModule(index),
|
||||
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
|
||||
.createChildInjector(parentInjector);
|
||||
|
||||
return injector.getInstance(AnalysisService.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleConfigurationJson() {
|
||||
Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/test1.json").build();
|
||||
|
@ -79,6 +95,38 @@ public class AnalysisModuleTests extends ElasticsearchTestCase {
|
|||
assertTokenFilter("arabic_normalization", ArabicNormalizationFilter.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVersionedAnalyzers() throws Exception {
|
||||
Settings settings2 = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/test1.yml")
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build();
|
||||
AnalysisService analysisService2 = getAnalysisService(settings2);
|
||||
|
||||
// indicesanalysisservice always has the current version
|
||||
IndicesAnalysisService indicesAnalysisService2 = injector.getInstance(IndicesAnalysisService.class);
|
||||
assertThat(indicesAnalysisService2.analyzer("default"), is(instanceOf(NamedAnalyzer.class)));
|
||||
NamedAnalyzer defaultNamedAnalyzer = (NamedAnalyzer) indicesAnalysisService2.analyzer("default");
|
||||
assertThat(defaultNamedAnalyzer.analyzer(), is(instanceOf(StandardAnalyzer.class)));
|
||||
assertLuceneAnalyzerVersion(Version.CURRENT.luceneVersion, defaultNamedAnalyzer.analyzer());
|
||||
|
||||
// analysis service has the expected version
|
||||
assertThat(analysisService2.analyzer("standard").analyzer(), is(instanceOf(StandardAnalyzer.class)));
|
||||
assertLuceneAnalyzerVersion(Version.V_0_90_0.luceneVersion, analysisService2.analyzer("standard").analyzer());
|
||||
assertLuceneAnalyzerVersion(Version.V_0_90_0.luceneVersion, analysisService2.analyzer("thai").analyzer());
|
||||
}
|
||||
|
||||
// ugly reflection based hack to extract the lucene version from an analyzer
|
||||
private void assertLuceneAnalyzerVersion(org.apache.lucene.util.Version luceneVersion, Analyzer analyzer) throws Exception {
|
||||
Field field = analyzer.getClass().getSuperclass().getDeclaredField("matchVersion");
|
||||
boolean currentAccessible = field.isAccessible();
|
||||
field.setAccessible(true);
|
||||
Object obj = field.get(analyzer);
|
||||
field.setAccessible(currentAccessible);
|
||||
|
||||
assertThat(obj, instanceOf(org.apache.lucene.util.Version.class));
|
||||
org.apache.lucene.util.Version analyzerVersion = (org.apache.lucene.util.Version) obj;
|
||||
assertThat(analyzerVersion, is(luceneVersion));
|
||||
}
|
||||
|
||||
private void assertTokenFilter(String name, Class clazz) {
|
||||
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(ImmutableSettings.settingsBuilder().build());
|
||||
TokenFilterFactory tokenFilter = analysisService.tokenFilter(name);
|
||||
|
@ -88,16 +136,7 @@ public class AnalysisModuleTests extends ElasticsearchTestCase {
|
|||
}
|
||||
|
||||
private void testSimpleConfiguration(Settings settings) {
|
||||
Index index = new Index("test");
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
|
||||
Injector injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(index, settings),
|
||||
new IndexNameModule(index),
|
||||
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
|
||||
.createChildInjector(parentInjector);
|
||||
|
||||
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
|
||||
|
||||
AnalysisService analysisService = getAnalysisService(settings);
|
||||
Analyzer analyzer = analysisService.analyzer("custom1").analyzer();
|
||||
|
||||
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
|
||||
|
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
||||
import org.elasticsearch.test.AbstractIntegrationTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PreBuiltAnalyzerIntegrationTests extends AbstractIntegrationTest {
|
||||
|
||||
@Test
|
||||
public void testThatPreBuiltAnalyzersAreNotClosedOnIndexClose() throws Exception {
|
||||
Map<PreBuiltAnalyzers, List<Version>> loadedAnalyzers = Maps.newHashMap();
|
||||
|
||||
List<String> indexNames = Lists.newArrayList();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
String indexName = randomAsciiOfLength(10).toLowerCase(Locale.ROOT);
|
||||
indexNames.add(indexName);
|
||||
|
||||
int randomInt = randomInt(PreBuiltAnalyzers.values().length-1);
|
||||
PreBuiltAnalyzers preBuiltAnalyzer = PreBuiltAnalyzers.values()[randomInt];
|
||||
String name = preBuiltAnalyzer.name().toLowerCase(Locale.ROOT);
|
||||
|
||||
Version randomVersion = randomVersion();
|
||||
if (!loadedAnalyzers.containsKey(preBuiltAnalyzer)) {
|
||||
loadedAnalyzers.put(preBuiltAnalyzer, Lists.<Version>newArrayList());
|
||||
}
|
||||
loadedAnalyzers.get(preBuiltAnalyzer).add(randomVersion);
|
||||
|
||||
final XContentBuilder mapping = jsonBuilder().startObject()
|
||||
.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("foo")
|
||||
.field("type", "string")
|
||||
.field("analyzer", name)
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject();
|
||||
|
||||
Settings versionSettings = randomSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, randomVersion).build();
|
||||
client().admin().indices().prepareCreate(indexName).addMapping("type", mapping).setSettings(versionSettings).get();
|
||||
}
|
||||
|
||||
ensureGreen();
|
||||
|
||||
// index some amount of data
|
||||
for (int i = 0; i < 100; i++) {
|
||||
String randomIndex = indexNames.get(randomInt(indexNames.size()-1));
|
||||
String randomId = randomInt() + "";
|
||||
|
||||
Map<String, Object> data = Maps.newHashMap();
|
||||
data.put("foo", randomAsciiOfLength(50));
|
||||
|
||||
index(randomIndex, "type", randomId, data);
|
||||
}
|
||||
|
||||
refresh();
|
||||
|
||||
// close some of the indices
|
||||
int amountOfIndicesToClose = randomInt(10-1);
|
||||
for (int i = 0; i < amountOfIndicesToClose; i++) {
|
||||
String indexName = indexNames.get(i);
|
||||
client().admin().indices().prepareClose(indexName).execute().actionGet();
|
||||
}
|
||||
|
||||
ensureGreen();
|
||||
|
||||
// check that all above configured analyzers have been loaded
|
||||
assertThatAnalyzersHaveBeenLoaded(loadedAnalyzers);
|
||||
|
||||
// check that all of the prebuiltanalyzers are still open
|
||||
for (PreBuiltAnalyzers preBuiltAnalyzer : PreBuiltAnalyzers.values()) {
|
||||
assertLuceneAnalyzerIsNotClosed(preBuiltAnalyzer);
|
||||
}
|
||||
}
|
||||
|
||||
private void assertThatAnalyzersHaveBeenLoaded(Map<PreBuiltAnalyzers, List<Version>> expectedLoadedAnalyzers) {
|
||||
for (Map.Entry<PreBuiltAnalyzers, List<Version>> entry : expectedLoadedAnalyzers.entrySet()) {
|
||||
Map<Version, Analyzer> cachedAnalyzers = entry.getKey().getCachedAnalyzers();
|
||||
assertThat(cachedAnalyzers.keySet(), hasItems(entry.getValue().toArray(new Version[]{})));
|
||||
/*for (Version expectedVersion : entry.getValue()) {
|
||||
assertThat(cachedAnalyzers, contains(ex))
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
// the close() method of a lucene analyzer sets the storedValue field to null
|
||||
// we simply check this via reflection - ugly but works
|
||||
private void assertLuceneAnalyzerIsNotClosed(PreBuiltAnalyzers preBuiltAnalyzer) throws IllegalAccessException, NoSuchFieldException {
|
||||
|
||||
for (Map.Entry<Version, Analyzer> luceneAnalyzerEntry : preBuiltAnalyzer.getCachedAnalyzers().entrySet()) {
|
||||
Field field = getFieldFromClass("storedValue", luceneAnalyzerEntry.getValue());
|
||||
boolean currentAccessible = field.isAccessible();
|
||||
field.setAccessible(true);
|
||||
Object storedValue = field.get(preBuiltAnalyzer.getAnalyzer(luceneAnalyzerEntry.getKey()));
|
||||
field.setAccessible(currentAccessible);
|
||||
|
||||
assertThat(String.format(Locale.ROOT, "Analyzer %s in version %s seems to be closed", preBuiltAnalyzer.name(), luceneAnalyzerEntry.getKey()), storedValue, is(notNullValue()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for a field until it finds, loops through all superclasses
|
||||
*/
|
||||
private Field getFieldFromClass(String fieldName, Object obj) {
|
||||
Field field = null;
|
||||
boolean storedValueFieldFound = false;
|
||||
Class clazz = obj.getClass();
|
||||
while (!storedValueFieldFound) {
|
||||
try {
|
||||
field = clazz.getDeclaredField(fieldName);
|
||||
storedValueFieldFound = true;
|
||||
} catch (NoSuchFieldException e) {
|
||||
clazz = clazz.getSuperclass();
|
||||
}
|
||||
|
||||
if (Object.class.equals(clazz)) throw new RuntimeException("Could not find storedValue field in class" + clazz);
|
||||
}
|
||||
|
||||
return field;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.not;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PreBuiltAnalyzerProviderFactoryTests extends ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void testVersioningInFactoryProvider() throws Exception {
|
||||
PreBuiltAnalyzerProviderFactory factory = new PreBuiltAnalyzerProviderFactory("default", AnalyzerScope.INDEX, PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT));
|
||||
|
||||
AnalyzerProvider currentAnalyzerProvider = factory.create("default", ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
AnalyzerProvider former090AnalyzerProvider = factory.create("default", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build());
|
||||
AnalyzerProvider currentAnalyzerProviderReference = factory.create("default", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
|
||||
|
||||
// would love to access the version inside of the lucene analyzer, but that is not possible...
|
||||
assertThat(currentAnalyzerProvider, is(currentAnalyzerProviderReference));
|
||||
assertThat(currentAnalyzerProvider, is(not(former090AnalyzerProvider)));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.MapperTestUtils;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PreBuiltAnalyzerTests extends ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void testThatDefaultAndStandardAnalyzerAreTheSameInstance() {
|
||||
Analyzer currentStandardAnalyzer = PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT);
|
||||
Analyzer currentDefaultAnalyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.CURRENT);
|
||||
|
||||
// special case, these two are the same instance
|
||||
assertThat(currentDefaultAnalyzer, is(currentStandardAnalyzer));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatInstancesAreTheSameAlwaysForKeywordAnalyzer() {
|
||||
assertThat(PreBuiltAnalyzers.KEYWORD.getAnalyzer(Version.CURRENT),
|
||||
is(PreBuiltAnalyzers.KEYWORD.getAnalyzer(Version.V_0_18_0)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatInstancesAreCachedAndReused() {
|
||||
assertThat(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT),
|
||||
is(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT)));
|
||||
assertThat(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_0_18_0),
|
||||
is(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_0_18_0)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatInstancesWithSameLuceneVersionAreReused() {
|
||||
// both are lucene 4.4 and should return the same instance
|
||||
assertThat(PreBuiltAnalyzers.CATALAN.getAnalyzer(Version.V_0_90_5),
|
||||
is(PreBuiltAnalyzers.CATALAN.getAnalyzer(Version.V_0_90_6)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatAnalyzersAreUsedInMapping() throws IOException {
|
||||
int randomInt = randomInt(PreBuiltAnalyzers.values().length-1);
|
||||
PreBuiltAnalyzers randomPreBuiltAnalyzer = PreBuiltAnalyzers.values()[randomInt];
|
||||
String analyzerName = randomPreBuiltAnalyzer.name().toLowerCase(Locale.ROOT);
|
||||
|
||||
Version randomVersion = randomVersion();
|
||||
Settings indexSettings = ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, randomVersion).build();
|
||||
|
||||
NamedAnalyzer namedAnalyzer = new PreBuiltAnalyzerProvider(analyzerName, AnalyzerScope.INDEX, randomPreBuiltAnalyzer.getAnalyzer(randomVersion)).get();
|
||||
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "string").field("analyzer", analyzerName).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
DocumentMapper docMapper = MapperTestUtils.newParser(indexSettings).parse(mapping);
|
||||
|
||||
FieldMapper fieldMapper = docMapper.mappers().name("field").mapper();
|
||||
assertThat(fieldMapper.searchAnalyzer(), instanceOf(NamedAnalyzer.class));
|
||||
NamedAnalyzer fieldMapperNamedAnalyzer = (NamedAnalyzer) fieldMapper.searchAnalyzer();
|
||||
|
||||
assertThat(fieldMapperNamedAnalyzer.analyzer(), is(namedAnalyzer.analyzer()));
|
||||
}
|
||||
}
|
|
@ -48,6 +48,11 @@ index :
|
|||
custom5 :
|
||||
tokenizer : standard
|
||||
char_filter : [my_mapping]
|
||||
custom6 :
|
||||
type : standard
|
||||
custom7 :
|
||||
type : standard
|
||||
version: 3.6
|
||||
czechAnalyzerWithStemmer :
|
||||
tokenizer : standard
|
||||
filter : [standard, lowercase, stop, czech_stem]
|
||||
|
|
|
@ -48,7 +48,7 @@ public class MapperTestUtils {
|
|||
}
|
||||
|
||||
public static DocumentMapperParser newParser(Settings indexSettings) {
|
||||
return new DocumentMapperParser(new Index("test"), indexSettings, newAnalysisService(), new PostingsFormatService(new Index("test")),
|
||||
return new DocumentMapperParser(new Index("test"), indexSettings, newAnalysisService(indexSettings), new PostingsFormatService(new Index("test")),
|
||||
new DocValuesFormatService(new Index("test")), newSimilarityLookupService());
|
||||
}
|
||||
|
||||
|
@ -58,11 +58,15 @@ public class MapperTestUtils {
|
|||
}
|
||||
|
||||
public static AnalysisService newAnalysisService() {
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(ImmutableSettings.Builder.EMPTY_SETTINGS), new EnvironmentModule(new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector();
|
||||
return newAnalysisService(ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
|
||||
public static AnalysisService newAnalysisService(Settings indexSettings) {
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(indexSettings), new EnvironmentModule(new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector();
|
||||
Injector injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS),
|
||||
new IndexSettingsModule(new Index("test"), indexSettings),
|
||||
new IndexNameModule(new Index("test")),
|
||||
new AnalysisModule(ImmutableSettings.Builder.EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class))).createChildInjector(parentInjector);
|
||||
new AnalysisModule(indexSettings, parentInjector.getInstance(IndicesAnalysisService.class))).createChildInjector(parentInjector);
|
||||
|
||||
return injector.getInstance(AnalysisService.class);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue