Add version to prebuilt analyzers

This patch takes the version of the created index into account when a
prebuilt analyzer is created.
So, if an index was created with 0.90.4, then the prebuilt analyzers
will be the same than on the 0.90.4 release.

One reason for this feature is the possibility to change pre built
analyzers like the standard one.

The patch tries to reuse analyzers as mutch as possible. So even if
version X.Y.Z and X.Y.A use the same lucene analyzers, the same instance
is reused in order to prevent overcreation of lucene analyzer instances.

Closes #3790
This commit is contained in:
Alexander Reelsen 2013-10-26 13:16:33 +02:00
parent c9dab6991e
commit ec0880df45
12 changed files with 841 additions and 114 deletions

View File

@ -252,7 +252,9 @@ public class MetaDataCreateIndexService extends AbstractComponent {
indexSettingsBuilder.put(SETTING_AUTO_EXPAND_REPLICAS, settings.get(SETTING_AUTO_EXPAND_REPLICAS));
}
if (indexSettingsBuilder.get(SETTING_VERSION_CREATED) == null) {
indexSettingsBuilder.put(SETTING_VERSION_CREATED, version);
}
indexSettingsBuilder.put(SETTING_UUID, Strings.randomBase64UUID());
Settings actualIndexSettings = indexSettingsBuilder.build();

View File

@ -387,7 +387,7 @@ public class AnalysisModule extends AbstractModule {
}
// go over the tokenizers in the bindings and register the ones that are not configured
// go over the analyzers in the bindings and register the ones that are not configured
for (Map.Entry<String, Class<? extends AnalyzerProvider>> entry : analyzersBindings.analyzers.entrySet()) {
String analyzerName = entry.getKey();
Class<? extends AnalyzerProvider> clazz = entry.getValue();
@ -408,7 +408,6 @@ public class AnalysisModule extends AbstractModule {
}
}
bind(AnalysisService.class).in(Scopes.SINGLETON);
}

View File

@ -22,6 +22,8 @@ package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.component.CloseableComponent;
@ -188,14 +190,13 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
if (indicesAnalysisService != null) {
for (Map.Entry<String, PreBuiltAnalyzerProviderFactory> entry : indicesAnalysisService.analyzerProviderFactories().entrySet()) {
String name = entry.getKey();
Version indexVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
if (!analyzerProviders.containsKey(name)) {
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.Builder.EMPTY_SETTINGS));
}
name = Strings.toCamelCase(entry.getKey());
if (!name.equals(entry.getKey())) {
if (!analyzerProviders.containsKey(name)) {
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.Builder.EMPTY_SETTINGS));
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build()));
}
String camelCaseName = Strings.toCamelCase(name);
if (!camelCaseName.equals(entry.getKey()) && !analyzerProviders.containsKey(camelCaseName)) {
analyzerProviders.put(camelCaseName, entry.getValue().create(name, ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build()));
}
}
}

View File

@ -20,7 +20,12 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import java.util.Locale;
/**
*
@ -30,15 +35,17 @@ public class PreBuiltAnalyzerProviderFactory implements AnalyzerProviderFactory
private final PreBuiltAnalyzerProvider analyzerProvider;
public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) {
this(new PreBuiltAnalyzerProvider(name, scope, analyzer));
}
public PreBuiltAnalyzerProviderFactory(PreBuiltAnalyzerProvider analyzerProvider) {
this.analyzerProvider = analyzerProvider;
analyzerProvider = new PreBuiltAnalyzerProvider(name, scope, analyzer);
}
@Override
public AnalyzerProvider create(String name, Settings settings) {
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
if (!Version.CURRENT.equals(indexVersion)) {
Analyzer analyzer = PreBuiltAnalyzers.valueOf(name.toUpperCase(Locale.ROOT)).getAnalyzer(indexVersion);
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer);
}
return analyzerProvider;
}

View File

@ -22,66 +22,34 @@ package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.commongrams.*;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.*;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.fr.FrenchStemFilter;
import org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.miscellaneous.*;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.nl.DutchStemFilter;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.Version;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
@ -91,6 +59,7 @@ import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.analysis.*;
import java.io.Reader;
import java.util.Locale;
import java.util.Map;
import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS;
@ -101,7 +70,6 @@ import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_
public class IndicesAnalysisService extends AbstractComponent {
private final Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = ConcurrentCollections.newConcurrentMap();
private final Map<String, PreBuiltTokenizerFactoryFactory> tokenizerFactories = ConcurrentCollections.newConcurrentMap();
private final Map<String, PreBuiltTokenFilterFactoryFactory> tokenFilterFactories = ConcurrentCollections.newConcurrentMap();
private final Map<String, PreBuiltCharFilterFactoryFactory> charFilterFactories = ConcurrentCollections.newConcurrentMap();
@ -114,52 +82,10 @@ public class IndicesAnalysisService extends AbstractComponent {
public IndicesAnalysisService(Settings settings) {
super(settings);
StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Lucene.ANALYZER_VERSION);
analyzerProviderFactories.put("default", new PreBuiltAnalyzerProviderFactory("default", AnalyzerScope.INDICES, standardAnalyzer));
analyzerProviderFactories.put("standard", new PreBuiltAnalyzerProviderFactory("standard", AnalyzerScope.INDICES, standardAnalyzer));
analyzerProviderFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDICES, new KeywordAnalyzer()));
analyzerProviderFactories.put("stop", new PreBuiltAnalyzerProviderFactory("stop", AnalyzerScope.INDICES, new StopAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("whitespace", new PreBuiltAnalyzerProviderFactory("whitespace", AnalyzerScope.INDICES, new WhitespaceAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("simple", new PreBuiltAnalyzerProviderFactory("simple", AnalyzerScope.INDICES, new SimpleAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("classic", new PreBuiltAnalyzerProviderFactory("classic", AnalyzerScope.INDICES, new ClassicAnalyzer(Lucene.ANALYZER_VERSION)));
// extended ones
analyzerProviderFactories.put("pattern", new PreBuiltAnalyzerProviderFactory("pattern", AnalyzerScope.INDICES, new PatternAnalyzer(Lucene.ANALYZER_VERSION, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
analyzerProviderFactories.put("snowball", new PreBuiltAnalyzerProviderFactory("snowball", AnalyzerScope.INDICES, new SnowballAnalyzer(Lucene.ANALYZER_VERSION, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
analyzerProviderFactories.put("standard_html_strip", new PreBuiltAnalyzerProviderFactory("standard_html_strip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("armenian", new PreBuiltAnalyzerProviderFactory("armenian", AnalyzerScope.INDICES, new ArmenianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("basque", new PreBuiltAnalyzerProviderFactory("basque", AnalyzerScope.INDICES, new BasqueAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("brazilian", new PreBuiltAnalyzerProviderFactory("brazilian", AnalyzerScope.INDICES, new BrazilianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("bulgarian", new PreBuiltAnalyzerProviderFactory("bulgarian", AnalyzerScope.INDICES, new BulgarianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("catalan", new PreBuiltAnalyzerProviderFactory("catalan", AnalyzerScope.INDICES, new CatalanAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("chinese", new PreBuiltAnalyzerProviderFactory("chinese", AnalyzerScope.INDICES, new ChineseAnalyzer()));
analyzerProviderFactories.put("cjk", new PreBuiltAnalyzerProviderFactory("cjk", AnalyzerScope.INDICES, new CJKAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("czech", new PreBuiltAnalyzerProviderFactory("czech", AnalyzerScope.INDICES, new CzechAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("dutch", new PreBuiltAnalyzerProviderFactory("dutch", AnalyzerScope.INDICES, new DutchAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("danish", new PreBuiltAnalyzerProviderFactory("danish", AnalyzerScope.INDICES, new DanishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("english", new PreBuiltAnalyzerProviderFactory("english", AnalyzerScope.INDICES, new EnglishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("finnish", new PreBuiltAnalyzerProviderFactory("finnish", AnalyzerScope.INDICES, new FinnishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("french", new PreBuiltAnalyzerProviderFactory("french", AnalyzerScope.INDICES, new FrenchAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("galician", new PreBuiltAnalyzerProviderFactory("galician", AnalyzerScope.INDICES, new GalicianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("german", new PreBuiltAnalyzerProviderFactory("german", AnalyzerScope.INDICES, new GermanAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("greek", new PreBuiltAnalyzerProviderFactory("greek", AnalyzerScope.INDICES, new GreekAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("irish", new PreBuiltAnalyzerProviderFactory("irish", AnalyzerScope.INDICES, new IrishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("latvian", new PreBuiltAnalyzerProviderFactory("latvian", AnalyzerScope.INDICES, new LatvianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("persian", new PreBuiltAnalyzerProviderFactory("persian", AnalyzerScope.INDICES, new PersianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("portuguese", new PreBuiltAnalyzerProviderFactory("portuguese", AnalyzerScope.INDICES, new PortugueseAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("romanian", new PreBuiltAnalyzerProviderFactory("romanian", AnalyzerScope.INDICES, new RomanianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("russian", new PreBuiltAnalyzerProviderFactory("russian", AnalyzerScope.INDICES, new RussianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("spanish", new PreBuiltAnalyzerProviderFactory("spanish", AnalyzerScope.INDICES, new SpanishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("swedish", new PreBuiltAnalyzerProviderFactory("swedish", AnalyzerScope.INDICES, new SwedishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("turkish", new PreBuiltAnalyzerProviderFactory("turkish", AnalyzerScope.INDICES, new TurkishAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("thai", new PreBuiltAnalyzerProviderFactory("thai", AnalyzerScope.INDICES, new ThaiAnalyzer(Lucene.ANALYZER_VERSION)));
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
}
// Base Tokenizers
tokenizerFactories.put("standard", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@ -771,7 +697,7 @@ public class IndicesAnalysisService extends AbstractComponent {
}
public boolean hasAnalyzer(String name) {
return analyzer(name) != null;
return analyzerProviderFactories.containsKey(name);
}
public Analyzer analyzer(String name) {

View File

@ -0,0 +1,439 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.Version;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer;
import java.util.Map;
/**
*
*/
public enum PreBuiltAnalyzers {
STANDARD() {
@Override
protected Analyzer create(Version version) {
return new StandardAnalyzer(version.luceneVersion);
}
},
DEFAULT {
@Override
protected Analyzer create(Version version) {
// by calling get analyzer we are ensuring reuse of the same STANDARD analyzer for DEFAULT!
// this call does not create a new instance
return STANDARD.getAnalyzer(version);
}
},
KEYWORD(CachingStrategy.ONE) {
@Override
protected Analyzer create(Version version) {
return new KeywordAnalyzer();
}
},
STOP {
@Override
protected Analyzer create(Version version) {
return new StopAnalyzer(version.luceneVersion);
}
},
WHITESPACE {
@Override
protected Analyzer create(Version version) {
return new WhitespaceAnalyzer(version.luceneVersion);
}
},
SIMPLE {
@Override
protected Analyzer create(Version version) {
return new SimpleAnalyzer(version.luceneVersion);
}
},
CLASSIC {
@Override
protected Analyzer create(Version version) {
return new ClassicAnalyzer(version.luceneVersion);
}
},
SNOWBALL {
@Override
protected Analyzer create(Version version) {
return new SnowballAnalyzer(version.luceneVersion, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
},
PATTERN {
@Override
protected Analyzer create(Version version) {
return new PatternAnalyzer(version.luceneVersion, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
},
STANDARD_HTML_STRIP {
@Override
protected Analyzer create(Version version) {
return new StandardHtmlStripAnalyzer(version.luceneVersion);
}
},
ARABIC {
@Override
protected Analyzer create(Version version) {
return new ArabicAnalyzer(version.luceneVersion);
}
},
ARMENIAN {
@Override
protected Analyzer create(Version version) {
return new ArmenianAnalyzer(version.luceneVersion);
}
},
BASQUE {
@Override
protected Analyzer create(Version version) {
return new BasqueAnalyzer(version.luceneVersion);
}
},
BRAZILIAN {
@Override
protected Analyzer create(Version version) {
return new BrazilianAnalyzer(version.luceneVersion);
}
},
BULGARIAN {
@Override
protected Analyzer create(Version version) {
return new BulgarianAnalyzer(version.luceneVersion);
}
},
CATALAN {
@Override
protected Analyzer create(Version version) {
return new CatalanAnalyzer(version.luceneVersion);
}
},
CHINESE(CachingStrategy.ONE) {
@Override
protected Analyzer create(Version version) {
return new ChineseAnalyzer();
}
},
CJK {
@Override
protected Analyzer create(Version version) {
return new CJKAnalyzer(version.luceneVersion);
}
},
CZECH {
@Override
protected Analyzer create(Version version) {
return new CzechAnalyzer(version.luceneVersion);
}
},
DUTCH {
@Override
protected Analyzer create(Version version) {
return new DutchAnalyzer(version.luceneVersion);
}
},
DANISH {
@Override
protected Analyzer create(Version version) {
return new DanishAnalyzer(version.luceneVersion);
}
},
ENGLISH {
@Override
protected Analyzer create(Version version) {
return new EnglishAnalyzer(version.luceneVersion);
}
},
FINNISH {
@Override
protected Analyzer create(Version version) {
return new FinnishAnalyzer(version.luceneVersion);
}
},
FRENCH {
@Override
protected Analyzer create(Version version) {
return new FrenchAnalyzer(version.luceneVersion);
}
},
GALICIAN {
@Override
protected Analyzer create(Version version) {
return new GalicianAnalyzer(version.luceneVersion);
}
},
GERMAN {
@Override
protected Analyzer create(Version version) {
return new GermanAnalyzer(version.luceneVersion);
}
},
GREEK {
@Override
protected Analyzer create(Version version) {
return new GreekAnalyzer(version.luceneVersion);
}
},
HINDI {
@Override
protected Analyzer create(Version version) {
return new HindiAnalyzer(version.luceneVersion);
}
},
HUNGARIAN {
@Override
protected Analyzer create(Version version) {
return new HungarianAnalyzer(version.luceneVersion);
}
},
INDONESIAN {
@Override
protected Analyzer create(Version version) {
return new IndonesianAnalyzer(version.luceneVersion);
}
},
IRISH {
@Override
protected Analyzer create(Version version) {
return new IrishAnalyzer(version.luceneVersion);
}
},
ITALIAN {
@Override
protected Analyzer create(Version version) {
return new ItalianAnalyzer(version.luceneVersion);
}
},
LATVIAN {
@Override
protected Analyzer create(Version version) {
return new LatvianAnalyzer(version.luceneVersion);
}
},
NORWEGIAN {
@Override
protected Analyzer create(Version version) {
return new NorwegianAnalyzer(version.luceneVersion);
}
},
PERSIAN {
@Override
protected Analyzer create(Version version) {
return new PersianAnalyzer(version.luceneVersion);
}
},
PORTUGUESE {
@Override
protected Analyzer create(Version version) {
return new PortugueseAnalyzer(version.luceneVersion);
}
},
ROMANIAN {
@Override
protected Analyzer create(Version version) {
return new RomanianAnalyzer(version.luceneVersion);
}
},
RUSSIAN {
@Override
protected Analyzer create(Version version) {
return new RussianAnalyzer(version.luceneVersion);
}
},
SPANISH {
@Override
protected Analyzer create(Version version) {
return new SpanishAnalyzer(version.luceneVersion);
}
},
SWEDISH {
@Override
protected Analyzer create(Version version) {
return new SwedishAnalyzer(version.luceneVersion);
}
},
TURKISH {
@Override
protected Analyzer create(Version version) {
return new TurkishAnalyzer(version.luceneVersion);
}
},
THAI {
@Override
protected Analyzer create(Version version) {
return new ThaiAnalyzer(version.luceneVersion);
}
};
/**
* The strategy of caching the analyzer
*
* ONE Exactly one version is stored. Useful for analyzers which do not store version information
* LUCENE Exactly one version for each lucene version is stored. Useful to prevent different analyzers with the same version
* ELASTICSEARCH Exactly one version per elasticsearch version is stored. Useful if you change an analyzer between elasticsearch releases, when the lucene version does not change
*/
private static enum CachingStrategy { ONE, LUCENE, ELASTICSEARCH };
private CachingStrategy cachingStrategy;
protected final Map<Version, Analyzer> cachedAnalyzers = Maps.newHashMapWithExpectedSize(2);
PreBuiltAnalyzers() {
this(CachingStrategy.LUCENE);
}
PreBuiltAnalyzers(CachingStrategy cachingStrategy) {
this.cachingStrategy = cachingStrategy;
}
abstract protected Analyzer create(Version version);
public Map<Version, Analyzer> getCachedAnalyzers() {
return ImmutableMap.copyOf(cachedAnalyzers);
}
public synchronized Analyzer getAnalyzer(Version version) {
Analyzer analyzer = getCachedAnalyzer(version);
if (analyzer == null) {
analyzer = this.create(version);
}
if (!cachedAnalyzers.containsKey(version)) {
cachedAnalyzers.put(version, analyzer);
}
return analyzer;
}
private Analyzer getCachedAnalyzer(Version version) {
switch (this.cachingStrategy) {
case ONE:
// method to return the first found analyzer in the cache
if (cachedAnalyzers.size() > 0) {
return (Analyzer) cachedAnalyzers.values().toArray()[0];
}
break;
case LUCENE:
// find already cached analyzers with the same lucene version
for (Version elasticsearchVersion : cachedAnalyzers.keySet()) {
if (elasticsearchVersion.luceneVersion.equals(version.luceneVersion)) {
return cachedAnalyzers.get(elasticsearchVersion);
}
}
break;
case ELASTICSEARCH:
// check only for the same es version
if (cachedAnalyzers.containsKey(version)) {
return cachedAnalyzers.get(version);
}
break;
default:
throw new ElasticSearchException("No action configured for caching strategy[" + this.cachingStrategy + "]");
}
return null;
}
}

View File

@ -24,11 +24,13 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.lucene.Lucene;
@ -45,21 +47,35 @@ import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.hamcrest.MatcherAssert;
import org.junit.Ignore;
import org.junit.Test;
import java.io.*;
import java.lang.reflect.Field;
import java.util.Set;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.*;
/**
*
*/
public class AnalysisModuleTests extends ElasticsearchTestCase {
private Injector injector;
public AnalysisService getAnalysisService(Settings settings) {
Index index = new Index("test");
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
injector = new ModulesBuilder().add(
new IndexSettingsModule(index, settings),
new IndexNameModule(index),
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
.createChildInjector(parentInjector);
return injector.getInstance(AnalysisService.class);
}
@Test
public void testSimpleConfigurationJson() {
Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/test1.json").build();
@ -79,6 +95,38 @@ public class AnalysisModuleTests extends ElasticsearchTestCase {
assertTokenFilter("arabic_normalization", ArabicNormalizationFilter.class);
}
@Test
public void testVersionedAnalyzers() throws Exception {
Settings settings2 = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/test1.yml")
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build();
AnalysisService analysisService2 = getAnalysisService(settings2);
// indicesanalysisservice always has the current version
IndicesAnalysisService indicesAnalysisService2 = injector.getInstance(IndicesAnalysisService.class);
assertThat(indicesAnalysisService2.analyzer("default"), is(instanceOf(NamedAnalyzer.class)));
NamedAnalyzer defaultNamedAnalyzer = (NamedAnalyzer) indicesAnalysisService2.analyzer("default");
assertThat(defaultNamedAnalyzer.analyzer(), is(instanceOf(StandardAnalyzer.class)));
assertLuceneAnalyzerVersion(Version.CURRENT.luceneVersion, defaultNamedAnalyzer.analyzer());
// analysis service has the expected version
assertThat(analysisService2.analyzer("standard").analyzer(), is(instanceOf(StandardAnalyzer.class)));
assertLuceneAnalyzerVersion(Version.V_0_90_0.luceneVersion, analysisService2.analyzer("standard").analyzer());
assertLuceneAnalyzerVersion(Version.V_0_90_0.luceneVersion, analysisService2.analyzer("thai").analyzer());
}
// ugly reflection based hack to extract the lucene version from an analyzer
private void assertLuceneAnalyzerVersion(org.apache.lucene.util.Version luceneVersion, Analyzer analyzer) throws Exception {
Field field = analyzer.getClass().getSuperclass().getDeclaredField("matchVersion");
boolean currentAccessible = field.isAccessible();
field.setAccessible(true);
Object obj = field.get(analyzer);
field.setAccessible(currentAccessible);
assertThat(obj, instanceOf(org.apache.lucene.util.Version.class));
org.apache.lucene.util.Version analyzerVersion = (org.apache.lucene.util.Version) obj;
assertThat(analyzerVersion, is(luceneVersion));
}
private void assertTokenFilter(String name, Class clazz) {
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(ImmutableSettings.settingsBuilder().build());
TokenFilterFactory tokenFilter = analysisService.tokenFilter(name);
@ -88,16 +136,7 @@ public class AnalysisModuleTests extends ElasticsearchTestCase {
}
private void testSimpleConfiguration(Settings settings) {
Index index = new Index("test");
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(index, settings),
new IndexNameModule(index),
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
.createChildInjector(parentInjector);
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
AnalysisService analysisService = getAnalysisService(settings);
Analyzer analyzer = analysisService.analyzer("custom1").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));

View File

@ -0,0 +1,160 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.test.AbstractIntegrationTest;
import org.junit.Test;
import java.lang.reflect.Field;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.*;
/**
*
*/
public class PreBuiltAnalyzerIntegrationTests extends AbstractIntegrationTest {
@Test
public void testThatPreBuiltAnalyzersAreNotClosedOnIndexClose() throws Exception {
Map<PreBuiltAnalyzers, List<Version>> loadedAnalyzers = Maps.newHashMap();
List<String> indexNames = Lists.newArrayList();
for (int i = 0; i < 10; i++) {
String indexName = randomAsciiOfLength(10).toLowerCase(Locale.ROOT);
indexNames.add(indexName);
int randomInt = randomInt(PreBuiltAnalyzers.values().length-1);
PreBuiltAnalyzers preBuiltAnalyzer = PreBuiltAnalyzers.values()[randomInt];
String name = preBuiltAnalyzer.name().toLowerCase(Locale.ROOT);
Version randomVersion = randomVersion();
if (!loadedAnalyzers.containsKey(preBuiltAnalyzer)) {
loadedAnalyzers.put(preBuiltAnalyzer, Lists.<Version>newArrayList());
}
loadedAnalyzers.get(preBuiltAnalyzer).add(randomVersion);
final XContentBuilder mapping = jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("foo")
.field("type", "string")
.field("analyzer", name)
.endObject()
.endObject()
.endObject()
.endObject();
Settings versionSettings = randomSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, randomVersion).build();
client().admin().indices().prepareCreate(indexName).addMapping("type", mapping).setSettings(versionSettings).get();
}
ensureGreen();
// index some amount of data
for (int i = 0; i < 100; i++) {
String randomIndex = indexNames.get(randomInt(indexNames.size()-1));
String randomId = randomInt() + "";
Map<String, Object> data = Maps.newHashMap();
data.put("foo", randomAsciiOfLength(50));
index(randomIndex, "type", randomId, data);
}
refresh();
// close some of the indices
int amountOfIndicesToClose = randomInt(10-1);
for (int i = 0; i < amountOfIndicesToClose; i++) {
String indexName = indexNames.get(i);
client().admin().indices().prepareClose(indexName).execute().actionGet();
}
ensureGreen();
// check that all above configured analyzers have been loaded
assertThatAnalyzersHaveBeenLoaded(loadedAnalyzers);
// check that all of the prebuiltanalyzers are still open
for (PreBuiltAnalyzers preBuiltAnalyzer : PreBuiltAnalyzers.values()) {
assertLuceneAnalyzerIsNotClosed(preBuiltAnalyzer);
}
}
private void assertThatAnalyzersHaveBeenLoaded(Map<PreBuiltAnalyzers, List<Version>> expectedLoadedAnalyzers) {
for (Map.Entry<PreBuiltAnalyzers, List<Version>> entry : expectedLoadedAnalyzers.entrySet()) {
Map<Version, Analyzer> cachedAnalyzers = entry.getKey().getCachedAnalyzers();
assertThat(cachedAnalyzers.keySet(), hasItems(entry.getValue().toArray(new Version[]{})));
/*for (Version expectedVersion : entry.getValue()) {
assertThat(cachedAnalyzers, contains(ex))
}
*/
}
}
// the close() method of a lucene analyzer sets the storedValue field to null
// we simply check this via reflection - ugly but works
private void assertLuceneAnalyzerIsNotClosed(PreBuiltAnalyzers preBuiltAnalyzer) throws IllegalAccessException, NoSuchFieldException {
for (Map.Entry<Version, Analyzer> luceneAnalyzerEntry : preBuiltAnalyzer.getCachedAnalyzers().entrySet()) {
Field field = getFieldFromClass("storedValue", luceneAnalyzerEntry.getValue());
boolean currentAccessible = field.isAccessible();
field.setAccessible(true);
Object storedValue = field.get(preBuiltAnalyzer.getAnalyzer(luceneAnalyzerEntry.getKey()));
field.setAccessible(currentAccessible);
assertThat(String.format(Locale.ROOT, "Analyzer %s in version %s seems to be closed", preBuiltAnalyzer.name(), luceneAnalyzerEntry.getKey()), storedValue, is(notNullValue()));
}
}
/**
* Searches for a field until it finds, loops through all superclasses
*/
private Field getFieldFromClass(String fieldName, Object obj) {
Field field = null;
boolean storedValueFieldFound = false;
Class clazz = obj.getClass();
while (!storedValueFieldFound) {
try {
field = clazz.getDeclaredField(fieldName);
storedValueFieldFound = true;
} catch (NoSuchFieldException e) {
clazz = clazz.getSuperclass();
}
if (Object.class.equals(clazz)) throw new RuntimeException("Could not find storedValue field in class" + clazz);
}
return field;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
/**
*
*/
public class PreBuiltAnalyzerProviderFactoryTests extends ElasticsearchTestCase {
@Test
public void testVersioningInFactoryProvider() throws Exception {
PreBuiltAnalyzerProviderFactory factory = new PreBuiltAnalyzerProviderFactory("default", AnalyzerScope.INDEX, PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT));
AnalyzerProvider currentAnalyzerProvider = factory.create("default", ImmutableSettings.Builder.EMPTY_SETTINGS);
AnalyzerProvider former090AnalyzerProvider = factory.create("default", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_0_90_0).build());
AnalyzerProvider currentAnalyzerProviderReference = factory.create("default", ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
// would love to access the version inside of the lucene analyzer, but that is not possible...
assertThat(currentAnalyzerProvider, is(currentAnalyzerProviderReference));
assertThat(currentAnalyzerProvider, is(not(former090AnalyzerProvider)));
}
}

View File

@ -0,0 +1,97 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperTestUtils;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.io.IOException;
import java.util.Locale;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
/**
*
*/
public class PreBuiltAnalyzerTests extends ElasticsearchTestCase {
@Test
public void testThatDefaultAndStandardAnalyzerAreTheSameInstance() {
Analyzer currentStandardAnalyzer = PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT);
Analyzer currentDefaultAnalyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.CURRENT);
// special case, these two are the same instance
assertThat(currentDefaultAnalyzer, is(currentStandardAnalyzer));
}
@Test
public void testThatInstancesAreTheSameAlwaysForKeywordAnalyzer() {
assertThat(PreBuiltAnalyzers.KEYWORD.getAnalyzer(Version.CURRENT),
is(PreBuiltAnalyzers.KEYWORD.getAnalyzer(Version.V_0_18_0)));
}
@Test
public void testThatInstancesAreCachedAndReused() {
assertThat(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT),
is(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT)));
assertThat(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_0_18_0),
is(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_0_18_0)));
}
@Test
public void testThatInstancesWithSameLuceneVersionAreReused() {
// both are lucene 4.4 and should return the same instance
assertThat(PreBuiltAnalyzers.CATALAN.getAnalyzer(Version.V_0_90_5),
is(PreBuiltAnalyzers.CATALAN.getAnalyzer(Version.V_0_90_6)));
}
@Test
public void testThatAnalyzersAreUsedInMapping() throws IOException {
int randomInt = randomInt(PreBuiltAnalyzers.values().length-1);
PreBuiltAnalyzers randomPreBuiltAnalyzer = PreBuiltAnalyzers.values()[randomInt];
String analyzerName = randomPreBuiltAnalyzer.name().toLowerCase(Locale.ROOT);
Version randomVersion = randomVersion();
Settings indexSettings = ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, randomVersion).build();
NamedAnalyzer namedAnalyzer = new PreBuiltAnalyzerProvider(analyzerName, AnalyzerScope.INDEX, randomPreBuiltAnalyzer.getAnalyzer(randomVersion)).get();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "string").field("analyzer", analyzerName).endObject().endObject()
.endObject().endObject().string();
DocumentMapper docMapper = MapperTestUtils.newParser(indexSettings).parse(mapping);
FieldMapper fieldMapper = docMapper.mappers().name("field").mapper();
assertThat(fieldMapper.searchAnalyzer(), instanceOf(NamedAnalyzer.class));
NamedAnalyzer fieldMapperNamedAnalyzer = (NamedAnalyzer) fieldMapper.searchAnalyzer();
assertThat(fieldMapperNamedAnalyzer.analyzer(), is(namedAnalyzer.analyzer()));
}
}

View File

@ -48,6 +48,11 @@ index :
custom5 :
tokenizer : standard
char_filter : [my_mapping]
custom6 :
type : standard
custom7 :
type : standard
version: 3.6
czechAnalyzerWithStemmer :
tokenizer : standard
filter : [standard, lowercase, stop, czech_stem]

View File

@ -48,7 +48,7 @@ public class MapperTestUtils {
}
public static DocumentMapperParser newParser(Settings indexSettings) {
return new DocumentMapperParser(new Index("test"), indexSettings, newAnalysisService(), new PostingsFormatService(new Index("test")),
return new DocumentMapperParser(new Index("test"), indexSettings, newAnalysisService(indexSettings), new PostingsFormatService(new Index("test")),
new DocValuesFormatService(new Index("test")), newSimilarityLookupService());
}
@ -58,11 +58,15 @@ public class MapperTestUtils {
}
public static AnalysisService newAnalysisService() {
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(ImmutableSettings.Builder.EMPTY_SETTINGS), new EnvironmentModule(new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector();
return newAnalysisService(ImmutableSettings.Builder.EMPTY_SETTINGS);
}
public static AnalysisService newAnalysisService(Settings indexSettings) {
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(indexSettings), new EnvironmentModule(new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS),
new IndexSettingsModule(new Index("test"), indexSettings),
new IndexNameModule(new Index("test")),
new AnalysisModule(ImmutableSettings.Builder.EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class))).createChildInjector(parentInjector);
new AnalysisModule(indexSettings, parentInjector.getInstance(IndicesAnalysisService.class))).createChildInjector(parentInjector);
return injector.getInstance(AnalysisService.class);
}