Fix analyzer alias processing (#19506)

In the lack of tests the analyzer.alias feature was pretty much not working
at all on current master. Issues like #19163 showed some serious problems for users
using this feature upgrading to an alpha version.
This change fixes the processing order and allows aliases to be set for
existing analyzers like `default`. This change also ensures that if `default`
is aliased the correct analyzer is used for `default_search` etc.

Closes #19163
This commit is contained in:
Simon Willnauer 2016-07-21 09:32:47 +02:00 committed by GitHub
parent cebad703fe
commit 302c7a521a
7 changed files with 255 additions and 67 deletions

View File

@ -28,8 +28,11 @@ import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.core.TextFieldMapper;
import java.io.Closeable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static java.util.Collections.unmodifiableMap;
@ -58,69 +61,34 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
this.tokenFilters = unmodifiableMap(tokenFilterFactoryFactories);
analyzerProviders = new HashMap<>(analyzerProviders);
if (!analyzerProviders.containsKey("default")) {
analyzerProviders.put("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("default_search")) {
analyzerProviders.put("default_search", analyzerProviders.get("default"));
}
if (!analyzerProviders.containsKey("default_search_quoted")) {
analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search"));
}
Map<String, NamedAnalyzer> analyzerAliases = new HashMap<>();
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
AnalyzerProvider<?> analyzerFactory = entry.getValue();
String name = entry.getKey();
/*
* Lucene defaults positionIncrementGap to 0 in all analyzers but
* Elasticsearch defaults them to 0 only before version 2.0
* and 100 afterwards so we override the positionIncrementGap if it
* doesn't match here.
*/
int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
if (analyzerFactory instanceof CustomAnalyzerProvider) {
((CustomAnalyzerProvider) analyzerFactory).build(this);
/*
* Custom analyzers already default to the correct, version
* dependent positionIncrementGap and the user is be able to
* configure the positionIncrementGap directly on the analyzer so
* we disable overriding the positionIncrementGap to preserve the
* user's setting.
*/
overridePositionIncrementGap = Integer.MIN_VALUE;
}
Analyzer analyzerF = analyzerFactory.get();
if (analyzerF == null) {
throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
}
NamedAnalyzer analyzer;
if (analyzerF instanceof NamedAnalyzer) {
// if we got a named analyzer back, use it...
analyzer = (NamedAnalyzer) analyzerF;
if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
// unless the positionIncrementGap needs to be overridden
analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
}
processAnalyzerFactory(entry.getKey(), entry.getValue(), analyzerAliases, analyzers);
}
for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
String key = entry.getKey();
if (analyzers.containsKey(key) &&
("default".equals(key) || "default_search".equals(key) || "default_search_quoted".equals(key)) == false) {
throw new IllegalStateException("already registered analyzer with name: " + key);
} else {
analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
}
if (analyzers.containsKey(name)) {
throw new IllegalStateException("already registered analyzer with name: " + name);
}
analyzers.put(name, analyzer);
String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
if (strAliases != null) {
for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
analyzers.put(alias, analyzer);
}
}
String[] aliases = this.indexSettings.getSettings().getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
for (String alias : aliases) {
analyzers.put(alias, analyzer);
NamedAnalyzer configured = entry.getValue();
analyzers.put(key, configured);
}
}
if (!analyzers.containsKey("default")) {
processAnalyzerFactory("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS),
analyzerAliases, analyzers);
}
if (!analyzers.containsKey("default_search")) {
analyzers.put("default_search", analyzers.get("default"));
}
if (!analyzers.containsKey("default_search_quoted")) {
analyzers.put("default_search_quoted", analyzers.get("default_search"));
}
NamedAnalyzer defaultAnalyzer = analyzers.get("default");
if (defaultAnalyzer == null) {
throw new IllegalArgumentException("no default analyzer configured");
@ -145,6 +113,58 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
this.analyzers = unmodifiableMap(analyzers);
}
private void processAnalyzerFactory(String name, AnalyzerProvider<?> analyzerFactory, Map<String, NamedAnalyzer> analyzerAliases, Map<String, NamedAnalyzer> analyzers) {
/*
* Lucene defaults positionIncrementGap to 0 in all analyzers but
* Elasticsearch defaults them to 0 only before version 2.0
* and 100 afterwards so we override the positionIncrementGap if it
* doesn't match here.
*/
int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
if (analyzerFactory instanceof CustomAnalyzerProvider) {
((CustomAnalyzerProvider) analyzerFactory).build(this);
/*
* Custom analyzers already default to the correct, version
* dependent positionIncrementGap and the user is be able to
* configure the positionIncrementGap directly on the analyzer so
* we disable overriding the positionIncrementGap to preserve the
* user's setting.
*/
overridePositionIncrementGap = Integer.MIN_VALUE;
}
Analyzer analyzerF = analyzerFactory.get();
if (analyzerF == null) {
throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
}
NamedAnalyzer analyzer;
if (analyzerF instanceof NamedAnalyzer) {
// if we got a named analyzer back, use it...
analyzer = (NamedAnalyzer) analyzerF;
if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
// unless the positionIncrementGap needs to be overridden
analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
}
} else {
analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
}
if (analyzers.containsKey(name)) {
throw new IllegalStateException("already registered analyzer with name: " + name);
}
analyzers.put(name, analyzer);
String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
Set<String> aliases = new HashSet<>();
if (strAliases != null) {
aliases.addAll(Strings.commaDelimitedListToSet(strAliases));
}
aliases.addAll(Arrays.asList(this.indexSettings.getSettings()
.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias")));
for (String alias : aliases) {
if (analyzerAliases.putIfAbsent(alias, analyzer) != null) {
throw new IllegalStateException("alias [" + alias + "] is already used by [" + analyzerAliases.get(alias).name() + "]");
}
}
}
@Override
public void close() {
for (NamedAnalyzer analyzer : analyzers.values()) {

View File

@ -435,9 +435,9 @@ public abstract class FieldMapper extends Mapper implements Cloneable {
boolean hasDifferentSearchQuoteAnalyzer = fieldType().searchAnalyzer().name().equals(fieldType().searchQuoteAnalyzer().name()) == false;
if (includeDefaults || hasDefaultIndexAnalyzer == false || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
builder.field("analyzer", fieldType().indexAnalyzer().name());
if (hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
if (includeDefaults || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
builder.field("search_analyzer", fieldType().searchAnalyzer().name());
if (hasDifferentSearchQuoteAnalyzer) {
if (includeDefaults || hasDifferentSearchQuoteAnalyzer) {
builder.field("search_quote_analyzer", fieldType().searchQuoteAnalyzer().name());
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService;
@ -44,6 +45,7 @@ import org.elasticsearch.test.ESSingleNodeTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.Collections;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@ -284,6 +286,46 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default search analyzer
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "keyword")
.field("search_analyzer", "default")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
XContentBuilder builder = XContentFactory.jsonBuilder();
mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
String mappingString = builder.string();
assertTrue(mappingString.contains("analyzer"));
assertTrue(mappingString.contains("search_analyzer"));
assertTrue(mappingString.contains("search_quote_analyzer"));
}
public void testSearchQuoteAnalyzerSerialization() throws IOException {

View File

@ -53,6 +53,7 @@ import org.junit.Before;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import static java.util.Collections.emptyMap;
@ -301,6 +302,48 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default search analyzer
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "string")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default search analyzer
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "string")
.field("analyzer", "keyword")
.field("search_analyzer", "default")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "string")
.field("analyzer", "keyword")
.endObject()
.endObject().endObject().endObject().string();
mapper = parser.parse("type", new CompressedXContent(mapping));
XContentBuilder builder = XContentFactory.jsonBuilder();
mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
String mappingString = builder.string();
assertTrue(mappingString.contains("analyzer"));
assertTrue(mappingString.contains("search_analyzer"));
assertTrue(mappingString.contains("search_quote_analyzer"));
}
private Map<String, Object> getSerializedMap(String fieldName, DocumentMapper mapper) throws Exception {

View File

@ -25,6 +25,8 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
@ -52,6 +54,7 @@ import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;
import org.hamcrest.MatcherAssert;
import java.io.BufferedWriter;
@ -126,29 +129,59 @@ public class AnalysisModuleTests extends ModuleTestCase {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "keyword")
.put("index.analysis.analyzer.foobar_search.alias","default_search")
.put("index.analysis.analyzer.foobar_search.type","english")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
AnalysisService as = getAnalysisService(newRegistry, settings);
assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(EnglishAnalyzer.class)));
}
public void testDoubleAlias() throws IOException {
public void testAnalyzerAliasReferencesAlias() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "german")
.put("index.analysis.analyzer.foobar_search.alias","default_search")
.put("index.analysis.analyzer.foobar_search.type", "default")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
AnalysisService as = getAnalysisService(newRegistry, settings);
assertThat(as.analyzer("default").analyzer(), is(instanceOf(GermanAnalyzer.class)));
// analyzer types are bound early before we resolve aliases
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(StandardAnalyzer.class)));
}
public void testAnalyzerAliasDefault() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "keyword")
.put("index.analysis.analyzer.barfoo.alias","default")
.put("index.analysis.analyzer.barfoo.type","english")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
String message = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings)).getMessage();
assertEquals("already registered analyzer with name: default", message);
AnalysisService as = getAnalysisService(newRegistry, settings);
assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
}
public void testAnalyzerAliasMoreThanOnce() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.foobar.alias","default")
.put("index.analysis.analyzer.foobar.type", "keyword")
.put("index.analysis.analyzer.foobar1.alias","default")
.put("index.analysis.analyzer.foobar1.type", "english")
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
AnalysisRegistry newRegistry = getNewRegistry(settings);
IllegalStateException ise = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings));
assertEquals("alias [default] is already used by [foobar]", ise.getMessage());
}
public void testVersionedAnalyzers() throws Exception {
String yaml = "/org/elasticsearch/index/analysis/test1.yml";
Settings settings2 = Settings.builder()

View File

@ -32,12 +32,15 @@ import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.indices.IndexTemplateAlreadyExistsException;
import org.elasticsearch.indices.InvalidAliasNameException;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESIntegTestCase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;

View File

@ -70,4 +70,51 @@
settings:
number_of_shards: 1
number_of_replicas: 0
---
"Put template with analyzer alias":
- do:
indices.put_template:
name: test
create: true
order: 0
body:
template: test_*
settings:
index.analysis.analyzer.foobar.alias: "default"
index.analysis.analyzer.foobar.type: "keyword"
index.analysis.analyzer.foobar_search.alias: "default_search"
index.analysis.analyzer.foobar_search.type: "standard"
- do:
index:
index: test_index
type: test
body: { field: "the quick brown fox" }
- do:
indices.refresh:
index: test_index
- do:
search:
index: test_index
type: test
body:
query:
term:
field: "the quick brown fox"
- match: {hits.total: 1}
- do:
search:
index: test_index
type: test
body:
query:
match:
field: "the quick brown fox"
- match: {hits.total: 0}