From 302c7a521adec8da71192bd9a82113537eac154c Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 21 Jul 2016 09:32:47 +0200 Subject: [PATCH] Fix analyzer alias processing (#19506) In the lack of tests the analyzer.alias feature was pretty much not working at all on current master. Issues like #19163 showed some serious problems for users using this feature upgrading to an alpha version. This change fixes the processing order and allows aliases to be set for existing analyzers like `default`. This change also ensures that if `default` is aliased the correct analyzer is used for `default_search` etc. Closes #19163 --- .../index/analysis/AnalysisService.java | 134 ++++++++++-------- .../index/mapper/FieldMapper.java | 4 +- .../mapper/core/TextFieldMapperTests.java | 42 ++++++ .../string/SimpleStringMappingTests.java | 43 ++++++ .../indices/analysis/AnalysisModuleTests.java | 49 +++++-- .../template/SimpleIndexTemplateIT.java | 3 + .../test/indices.put_template/10_basic.yaml | 47 ++++++ 7 files changed, 255 insertions(+), 67 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java index 0009fc95409..afe5c2ff250 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java @@ -28,8 +28,11 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.core.TextFieldMapper; import java.io.Closeable; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import static java.util.Collections.unmodifiableMap; @@ -58,69 +61,34 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable this.tokenFilters = unmodifiableMap(tokenFilterFactoryFactories); analyzerProviders = new HashMap<>(analyzerProviders); - if (!analyzerProviders.containsKey("default")) { - analyzerProviders.put("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS)); - } - if (!analyzerProviders.containsKey("default_search")) { - analyzerProviders.put("default_search", analyzerProviders.get("default")); - } - if (!analyzerProviders.containsKey("default_search_quoted")) { - analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search")); - } - + Map analyzerAliases = new HashMap<>(); Map analyzers = new HashMap<>(); for (Map.Entry> entry : analyzerProviders.entrySet()) { - AnalyzerProvider analyzerFactory = entry.getValue(); - String name = entry.getKey(); - /* - * Lucene defaults positionIncrementGap to 0 in all analyzers but - * Elasticsearch defaults them to 0 only before version 2.0 - * and 100 afterwards so we override the positionIncrementGap if it - * doesn't match here. - */ - int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP; - if (analyzerFactory instanceof CustomAnalyzerProvider) { - ((CustomAnalyzerProvider) analyzerFactory).build(this); - /* - * Custom analyzers already default to the correct, version - * dependent positionIncrementGap and the user is be able to - * configure the positionIncrementGap directly on the analyzer so - * we disable overriding the positionIncrementGap to preserve the - * user's setting. - */ - overridePositionIncrementGap = Integer.MIN_VALUE; - } - Analyzer analyzerF = analyzerFactory.get(); - if (analyzerF == null) { - throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer"); - } - NamedAnalyzer analyzer; - if (analyzerF instanceof NamedAnalyzer) { - // if we got a named analyzer back, use it... - analyzer = (NamedAnalyzer) analyzerF; - if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { - // unless the positionIncrementGap needs to be overridden - analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); - } + processAnalyzerFactory(entry.getKey(), entry.getValue(), analyzerAliases, analyzers); + } + for (Map.Entry entry : analyzerAliases.entrySet()) { + String key = entry.getKey(); + if (analyzers.containsKey(key) && + ("default".equals(key) || "default_search".equals(key) || "default_search_quoted".equals(key)) == false) { + throw new IllegalStateException("already registered analyzer with name: " + key); } else { - analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); - } - if (analyzers.containsKey(name)) { - throw new IllegalStateException("already registered analyzer with name: " + name); - } - analyzers.put(name, analyzer); - String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); - if (strAliases != null) { - for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) { - analyzers.put(alias, analyzer); - } - } - String[] aliases = this.indexSettings.getSettings().getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); - for (String alias : aliases) { - analyzers.put(alias, analyzer); + NamedAnalyzer configured = entry.getValue(); + analyzers.put(key, configured); } } + if (!analyzers.containsKey("default")) { + processAnalyzerFactory("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS), + analyzerAliases, analyzers); + } + if (!analyzers.containsKey("default_search")) { + analyzers.put("default_search", analyzers.get("default")); + } + if (!analyzers.containsKey("default_search_quoted")) { + analyzers.put("default_search_quoted", analyzers.get("default_search")); + } + + NamedAnalyzer defaultAnalyzer = analyzers.get("default"); if (defaultAnalyzer == null) { throw new IllegalArgumentException("no default analyzer configured"); @@ -145,6 +113,58 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable this.analyzers = unmodifiableMap(analyzers); } + private void processAnalyzerFactory(String name, AnalyzerProvider analyzerFactory, Map analyzerAliases, Map analyzers) { + /* + * Lucene defaults positionIncrementGap to 0 in all analyzers but + * Elasticsearch defaults them to 0 only before version 2.0 + * and 100 afterwards so we override the positionIncrementGap if it + * doesn't match here. + */ + int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP; + if (analyzerFactory instanceof CustomAnalyzerProvider) { + ((CustomAnalyzerProvider) analyzerFactory).build(this); + /* + * Custom analyzers already default to the correct, version + * dependent positionIncrementGap and the user is be able to + * configure the positionIncrementGap directly on the analyzer so + * we disable overriding the positionIncrementGap to preserve the + * user's setting. + */ + overridePositionIncrementGap = Integer.MIN_VALUE; + } + Analyzer analyzerF = analyzerFactory.get(); + if (analyzerF == null) { + throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer"); + } + NamedAnalyzer analyzer; + if (analyzerF instanceof NamedAnalyzer) { + // if we got a named analyzer back, use it... + analyzer = (NamedAnalyzer) analyzerF; + if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { + // unless the positionIncrementGap needs to be overridden + analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); + } + } else { + analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); + } + if (analyzers.containsKey(name)) { + throw new IllegalStateException("already registered analyzer with name: " + name); + } + analyzers.put(name, analyzer); + String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); + Set aliases = new HashSet<>(); + if (strAliases != null) { + aliases.addAll(Strings.commaDelimitedListToSet(strAliases)); + } + aliases.addAll(Arrays.asList(this.indexSettings.getSettings() + .getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias"))); + for (String alias : aliases) { + if (analyzerAliases.putIfAbsent(alias, analyzer) != null) { + throw new IllegalStateException("alias [" + alias + "] is already used by [" + analyzerAliases.get(alias).name() + "]"); + } + } + } + @Override public void close() { for (NamedAnalyzer analyzer : analyzers.values()) { diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index c9f5e416f8a..d74d747d22b 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -435,9 +435,9 @@ public abstract class FieldMapper extends Mapper implements Cloneable { boolean hasDifferentSearchQuoteAnalyzer = fieldType().searchAnalyzer().name().equals(fieldType().searchQuoteAnalyzer().name()) == false; if (includeDefaults || hasDefaultIndexAnalyzer == false || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) { builder.field("analyzer", fieldType().indexAnalyzer().name()); - if (hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) { + if (includeDefaults || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) { builder.field("search_analyzer", fieldType().searchAnalyzer().name()); - if (hasDifferentSearchQuoteAnalyzer) { + if (includeDefaults || hasDifferentSearchQuoteAnalyzer) { builder.field("search_quote_analyzer", fieldType().searchQuoteAnalyzer().name()); } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/core/TextFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/core/TextFieldMapperTests.java index 6ef040233cc..002d06b7d60 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/core/TextFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/core/TextFieldMapperTests.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.IndexService; @@ -44,6 +45,7 @@ import org.elasticsearch.test.ESSingleNodeTestCase; import org.junit.Before; import java.io.IOException; +import java.util.Collections; import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -284,6 +286,46 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase { mapper = parser.parse("type", new CompressedXContent(mapping)); assertEquals(mapping, mapper.mappingSource().toString()); + + mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "text") + .field("analyzer", "keyword") + .endObject() + .endObject().endObject().endObject().string(); + + mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + // special case: default search analyzer + mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "text") + .field("analyzer", "keyword") + .field("search_analyzer", "default") + .endObject() + .endObject().endObject().endObject().string(); + + mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "text") + .field("analyzer", "keyword") + .endObject() + .endObject().endObject().endObject().string(); + mapper = parser.parse("type", new CompressedXContent(mapping)); + XContentBuilder builder = XContentFactory.jsonBuilder(); + + mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true"))); + String mappingString = builder.string(); + assertTrue(mappingString.contains("analyzer")); + assertTrue(mappingString.contains("search_analyzer")); + assertTrue(mappingString.contains("search_quote_analyzer")); } public void testSearchQuoteAnalyzerSerialization() throws IOException { diff --git a/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java b/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java index 0dc1c4c5118..416774b1be4 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java @@ -53,6 +53,7 @@ import org.junit.Before; import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.Map; import static java.util.Collections.emptyMap; @@ -301,6 +302,48 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase { mapper = parser.parse("type", new CompressedXContent(mapping)); assertEquals(mapping, mapper.mappingSource().toString()); + + // special case: default search analyzer + mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "string") + .field("analyzer", "keyword") + .endObject() + .endObject().endObject().endObject().string(); + + mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + // special case: default search analyzer + mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "string") + .field("analyzer", "keyword") + .field("search_analyzer", "default") + .endObject() + .endObject().endObject().endObject().string(); + + mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + + mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "string") + .field("analyzer", "keyword") + .endObject() + .endObject().endObject().endObject().string(); + mapper = parser.parse("type", new CompressedXContent(mapping)); + XContentBuilder builder = XContentFactory.jsonBuilder(); + + mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true"))); + String mappingString = builder.string(); + assertTrue(mappingString.contains("analyzer")); + assertTrue(mappingString.contains("search_analyzer")); + assertTrue(mappingString.contains("search_quote_analyzer")); } private Map getSerializedMap(String fieldName, DocumentMapper mapper) throws Exception { diff --git a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index 7ca0df72f8f..e44d8a89cb7 100644 --- a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -25,6 +25,8 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.fa.PersianNormalizationFilter; import org.apache.lucene.analysis.hunspell.Dictionary; import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter; @@ -52,6 +54,7 @@ import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; import org.hamcrest.MatcherAssert; import java.io.BufferedWriter; @@ -126,29 +129,59 @@ public class AnalysisModuleTests extends ModuleTestCase { Settings settings = Settings.builder() .put("index.analysis.analyzer.foobar.alias","default") .put("index.analysis.analyzer.foobar.type", "keyword") + .put("index.analysis.analyzer.foobar_search.alias","default_search") + .put("index.analysis.analyzer.foobar_search.type","english") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) .build(); AnalysisRegistry newRegistry = getNewRegistry(settings); AnalysisService as = getAnalysisService(newRegistry, settings); assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class))); - + assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(EnglishAnalyzer.class))); } - public void testDoubleAlias() throws IOException { + public void testAnalyzerAliasReferencesAlias() throws IOException { + Settings settings = Settings.builder() + .put("index.analysis.analyzer.foobar.alias","default") + .put("index.analysis.analyzer.foobar.type", "german") + .put("index.analysis.analyzer.foobar_search.alias","default_search") + .put("index.analysis.analyzer.foobar_search.type", "default") + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .build(); + AnalysisRegistry newRegistry = getNewRegistry(settings); + AnalysisService as = getAnalysisService(newRegistry, settings); + assertThat(as.analyzer("default").analyzer(), is(instanceOf(GermanAnalyzer.class))); + // analyzer types are bound early before we resolve aliases + assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(StandardAnalyzer.class))); + } + + public void testAnalyzerAliasDefault() throws IOException { Settings settings = Settings.builder() .put("index.analysis.analyzer.foobar.alias","default") .put("index.analysis.analyzer.foobar.type", "keyword") - .put("index.analysis.analyzer.barfoo.alias","default") - .put("index.analysis.analyzer.barfoo.type","english") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) .build(); AnalysisRegistry newRegistry = getNewRegistry(settings); - String message = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings)).getMessage(); - assertEquals("already registered analyzer with name: default", message); + AnalysisService as = getAnalysisService(newRegistry, settings); + assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class))); + assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(KeywordAnalyzer.class))); } + public void testAnalyzerAliasMoreThanOnce() throws IOException { + Settings settings = Settings.builder() + .put("index.analysis.analyzer.foobar.alias","default") + .put("index.analysis.analyzer.foobar.type", "keyword") + .put("index.analysis.analyzer.foobar1.alias","default") + .put("index.analysis.analyzer.foobar1.type", "english") + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + AnalysisRegistry newRegistry = getNewRegistry(settings); + IllegalStateException ise = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings)); + assertEquals("alias [default] is already used by [foobar]", ise.getMessage()); + } public void testVersionedAnalyzers() throws Exception { String yaml = "/org/elasticsearch/index/analysis/test1.yml"; Settings settings2 = Settings.builder() diff --git a/core/src/test/java/org/elasticsearch/indices/template/SimpleIndexTemplateIT.java b/core/src/test/java/org/elasticsearch/indices/template/SimpleIndexTemplateIT.java index a5ec8e4ecd7..676f26e7d74 100644 --- a/core/src/test/java/org/elasticsearch/indices/template/SimpleIndexTemplateIT.java +++ b/core/src/test/java/org/elasticsearch/indices/template/SimpleIndexTemplateIT.java @@ -32,12 +32,15 @@ import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.indices.IndexTemplateAlreadyExistsException; import org.elasticsearch.indices.InvalidAliasNameException; import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESIntegTestCase; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.put_template/10_basic.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.put_template/10_basic.yaml index 7d3e7103b10..eefd9ed98c6 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.put_template/10_basic.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.put_template/10_basic.yaml @@ -70,4 +70,51 @@ settings: number_of_shards: 1 number_of_replicas: 0 +--- +"Put template with analyzer alias": + + - do: + indices.put_template: + name: test + create: true + order: 0 + body: + template: test_* + settings: + index.analysis.analyzer.foobar.alias: "default" + index.analysis.analyzer.foobar.type: "keyword" + index.analysis.analyzer.foobar_search.alias: "default_search" + index.analysis.analyzer.foobar_search.type: "standard" + + - do: + index: + index: test_index + type: test + body: { field: "the quick brown fox" } + + - do: + indices.refresh: + index: test_index + + - do: + search: + index: test_index + type: test + body: + query: + term: + field: "the quick brown fox" + + - match: {hits.total: 1} + + - do: + search: + index: test_index + type: test + body: + query: + match: + field: "the quick brown fox" + + - match: {hits.total: 0}