Fix analyzer alias processing (#19506)

In the lack of tests the analyzer.alias feature was pretty much not working
at all on current master. Issues like #19163 showed some serious problems for users
using this feature upgrading to an alpha version.
This change fixes the processing order and allows aliases to be set for
existing analyzers like `default`. This change also ensures that if `default`
is aliased the correct analyzer is used for `default_search` etc.

Closes #19163
This commit is contained in:
Simon Willnauer 2016-07-21 09:32:47 +02:00 committed by GitHub
parent cebad703fe
commit 302c7a521a
7 changed files with 255 additions and 67 deletions

View File

@ -28,8 +28,11 @@ import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.core.TextFieldMapper;
import java.io.Closeable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static java.util.Collections.unmodifiableMap;
@ -58,20 +61,59 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
this.tokenFilters = unmodifiableMap(tokenFilterFactoryFactories);
analyzerProviders = new HashMap<>(analyzerProviders);
if (!analyzerProviders.containsKey("default")) {
analyzerProviders.put("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("default_search")) {
analyzerProviders.put("default_search", analyzerProviders.get("default"));
}
if (!analyzerProviders.containsKey("default_search_quoted")) {
analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search"));
}
Map<String, NamedAnalyzer> analyzerAliases = new HashMap<>();
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
AnalyzerProvider<?> analyzerFactory = entry.getValue();
String name = entry.getKey();
processAnalyzerFactory(entry.getKey(), entry.getValue(), analyzerAliases, analyzers);
}
for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
String key = entry.getKey();
if (analyzers.containsKey(key) &&
("default".equals(key) || "default_search".equals(key) || "default_search_quoted".equals(key)) == false) {
throw new IllegalStateException("already registered analyzer with name: " + key);
} else {
NamedAnalyzer configured = entry.getValue();
analyzers.put(key, configured);
}
}
if (!analyzers.containsKey("default")) {
processAnalyzerFactory("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS),
analyzerAliases, analyzers);
}
if (!analyzers.containsKey("default_search")) {
analyzers.put("default_search", analyzers.get("default"));
}
if (!analyzers.containsKey("default_search_quoted")) {
analyzers.put("default_search_quoted", analyzers.get("default_search"));
}
NamedAnalyzer defaultAnalyzer = analyzers.get("default");
if (defaultAnalyzer == null) {
throw new IllegalArgumentException("no default analyzer configured");
}
if (analyzers.containsKey("default_index")) {
final Version createdVersion = indexSettings.getIndexVersionCreated();
if (createdVersion.onOrAfter(Version.V_5_0_0_alpha1)) {
throw new IllegalArgumentException("setting [index.analysis.analyzer.default_index] is not supported anymore, use [index.analysis.analyzer.default] instead for index [" + index().getName() + "]");
} else {
deprecationLogger.deprecated("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index().getName());
}
}
defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer;
defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer;
defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer;
for (Map.Entry<String, NamedAnalyzer> analyzer : analyzers.entrySet()) {
if (analyzer.getKey().startsWith("_")) {
throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\"");
}
}
this.analyzers = unmodifiableMap(analyzers);
}
private void processAnalyzerFactory(String name, AnalyzerProvider<?> analyzerFactory, Map<String, NamedAnalyzer> analyzerAliases, Map<String, NamedAnalyzer> analyzers) {
/*
* Lucene defaults positionIncrementGap to 0 in all analyzers but
* Elasticsearch defaults them to 0 only before version 2.0
@ -110,39 +152,17 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
}
analyzers.put(name, analyzer);
String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
Set<String> aliases = new HashSet<>();
if (strAliases != null) {
for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
analyzers.put(alias, analyzer);
aliases.addAll(Strings.commaDelimitedListToSet(strAliases));
}
}
String[] aliases = this.indexSettings.getSettings().getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
aliases.addAll(Arrays.asList(this.indexSettings.getSettings()
.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias")));
for (String alias : aliases) {
analyzers.put(alias, analyzer);
if (analyzerAliases.putIfAbsent(alias, analyzer) != null) {
throw new IllegalStateException("alias [" + alias + "] is already used by [" + analyzerAliases.get(alias).name() + "]");
}
}
NamedAnalyzer defaultAnalyzer = analyzers.get("default");
if (defaultAnalyzer == null) {
throw new IllegalArgumentException("no default analyzer configured");
}
if (analyzers.containsKey("default_index")) {
final Version createdVersion = indexSettings.getIndexVersionCreated();
if (createdVersion.onOrAfter(Version.V_5_0_0_alpha1)) {
throw new IllegalArgumentException("setting [index.analysis.analyzer.default_index] is not supported anymore, use [index.analysis.analyzer.default] instead for index [" + index().getName() + "]");
} else {
deprecationLogger.deprecated("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index().getName());
}
}
defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer;
defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer;
defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer;
for (Map.Entry<String, NamedAnalyzer> analyzer : analyzers.entrySet()) {
if (analyzer.getKey().startsWith("_")) {
throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\"");
}
}
this.analyzers = unmodifiableMap(analyzers);
}
@Override

View File

@ -435,9 +435,9 @@ public abstract class FieldMapper extends Mapper implements Cloneable {
boolean hasDifferentSearchQuoteAnalyzer = fieldType().searchAnalyzer().name().equals(fieldType().searchQuoteAnalyzer().name()) == false;
if (includeDefaults || hasDefaultIndexAnalyzer == false || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
builder.field("analyzer", fieldType().indexAnalyzer().name());
if (hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
if (includeDefaults || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
builder.field("search_analyzer", fieldType().searchAnalyzer().name());
if (hasDifferentSearchQuoteAnalyzer) {
if (includeDefaults || hasDifferentSearchQuoteAnalyzer) {
builder.field("search_quote_analyzer", fieldType().searchQuoteAnalyzer().name());
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService;
@ -44,6 +45,7 @@ import org.elasticsearch.test.ESSingleNodeTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.Collections;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@ -284,6 +286,46 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default search analyzer
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "keyword")
.field("search_analyzer", "default")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
XContentBuilder builder = XContentFactory.jsonBuilder();
mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
String mappingString = builder.string();
assertTrue(mappingString.contains("analyzer"));
assertTrue(mappingString.contains("search_analyzer"));
assertTrue(mappingString.contains("search_quote_analyzer"));
}
public void testSearchQuoteAnalyzerSerialization() throws IOException {

View File

@ -53,6 +53,7 @@ import org.junit.Before;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import static java.util.Collections.emptyMap;
@ -301,6 +302,48 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default search analyzer
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "string")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default search analyzer
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "string")
.field("analyzer", "keyword")
.field("search_analyzer", "default")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "string")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
XContentBuilder builder = XContentFactory.jsonBuilder();
mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
String mappingString = builder.string();
assertTrue(mappingString.contains("analyzer"));
assertTrue(mappingString.contains("search_analyzer"));
assertTrue(mappingString.contains("search_quote_analyzer"));
}
private Map<String, Object> getSerializedMap(String fieldName, DocumentMapper mapper) throws Exception {

View File

@ -25,6 +25,8 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
@ -52,6 +54,7 @@ import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;
import org.hamcrest.MatcherAssert;
import java.io.BufferedWriter;
@ -126,29 +129,59 @@ public class AnalysisModuleTests extends ModuleTestCase {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "keyword")
.put("index.analysis.analyzer.foobar_search.alias","default_search")
.put("index.analysis.analyzer.foobar_search.type","english")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
AnalysisService as = getAnalysisService(newRegistry, settings);
assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(EnglishAnalyzer.class)));
}
public void testDoubleAlias() throws IOException {
public void testAnalyzerAliasReferencesAlias() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "german")
.put("index.analysis.analyzer.foobar_search.alias","default_search")
.put("index.analysis.analyzer.foobar_search.type", "default")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
AnalysisService as = getAnalysisService(newRegistry, settings);
assertThat(as.analyzer("default").analyzer(), is(instanceOf(GermanAnalyzer.class)));
// analyzer types are bound early before we resolve aliases
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(StandardAnalyzer.class)));
}
public void testAnalyzerAliasDefault() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "keyword")
.put("index.analysis.analyzer.barfoo.alias","default")
.put("index.analysis.analyzer.barfoo.type","english")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
String message = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings)).getMessage();
assertEquals("already registered analyzer with name: default", message);
AnalysisService as = getAnalysisService(newRegistry, settings);
assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
}
public void testAnalyzerAliasMoreThanOnce() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "keyword")
.put("index.analysis.analyzer.foobar1.alias","default")
.put("index.analysis.analyzer.foobar1.type", "english")
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
IllegalStateException ise = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings));
assertEquals("alias [default] is already used by [foobar]", ise.getMessage());
}
public void testVersionedAnalyzers() throws Exception {
String yaml = "/org/elasticsearch/index/analysis/test1.yml";
Settings settings2 = Settings.builder()

View File

@ -32,12 +32,15 @@ import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.indices.IndexTemplateAlreadyExistsException;
import org.elasticsearch.indices.InvalidAliasNameException;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESIntegTestCase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;

View File

@ -70,4 +70,51 @@
settings:
number_of_shards: 1
number_of_replicas: 0
---
"Put template with analyzer alias":
- do:
indices.put_template:
name: test
create: true
order: 0
body:
template: test_*
settings:
index.analysis.analyzer.foobar.alias: "default"
index.analysis.analyzer.foobar.type: "keyword"
index.analysis.analyzer.foobar_search.alias: "default_search"
index.analysis.analyzer.foobar_search.type: "standard"
- do:
index:
index: test_index
type: test
body: { field: "the quick brown fox" }
- do:
indices.refresh:
index: test_index
- do:
search:
index: test_index
type: test
body:
query:
term:
field: "the quick brown fox"
- match: {hits.total: 1}
- do:
search:
index: test_index
type: test
body:
query:
match:
field: "the quick brown fox"
- match: {hits.total: 0}