mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-24 13:55:57 +00:00
Fix analyzer alias processing (#19506)
In the lack of tests the analyzer.alias feature was pretty much not working at all on current master. Issues like #19163 showed some serious problems for users using this feature upgrading to an alpha version. This change fixes the processing order and allows aliases to be set for existing analyzers like `default`. This change also ensures that if `default` is aliased the correct analyzer is used for `default_search` etc. Closes #19163
This commit is contained in:
parent
cebad703fe
commit
302c7a521a
@ -28,8 +28,11 @@ import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.core.TextFieldMapper;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
@ -58,20 +61,59 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
||||
this.tokenFilters = unmodifiableMap(tokenFilterFactoryFactories);
|
||||
analyzerProviders = new HashMap<>(analyzerProviders);
|
||||
|
||||
if (!analyzerProviders.containsKey("default")) {
|
||||
analyzerProviders.put("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("default_search")) {
|
||||
analyzerProviders.put("default_search", analyzerProviders.get("default"));
|
||||
}
|
||||
if (!analyzerProviders.containsKey("default_search_quoted")) {
|
||||
analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search"));
|
||||
}
|
||||
|
||||
Map<String, NamedAnalyzer> analyzerAliases = new HashMap<>();
|
||||
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
|
||||
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
|
||||
AnalyzerProvider<?> analyzerFactory = entry.getValue();
|
||||
String name = entry.getKey();
|
||||
processAnalyzerFactory(entry.getKey(), entry.getValue(), analyzerAliases, analyzers);
|
||||
}
|
||||
for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
if (analyzers.containsKey(key) &&
|
||||
("default".equals(key) || "default_search".equals(key) || "default_search_quoted".equals(key)) == false) {
|
||||
throw new IllegalStateException("already registered analyzer with name: " + key);
|
||||
} else {
|
||||
NamedAnalyzer configured = entry.getValue();
|
||||
analyzers.put(key, configured);
|
||||
}
|
||||
}
|
||||
|
||||
if (!analyzers.containsKey("default")) {
|
||||
processAnalyzerFactory("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS),
|
||||
analyzerAliases, analyzers);
|
||||
}
|
||||
if (!analyzers.containsKey("default_search")) {
|
||||
analyzers.put("default_search", analyzers.get("default"));
|
||||
}
|
||||
if (!analyzers.containsKey("default_search_quoted")) {
|
||||
analyzers.put("default_search_quoted", analyzers.get("default_search"));
|
||||
}
|
||||
|
||||
|
||||
NamedAnalyzer defaultAnalyzer = analyzers.get("default");
|
||||
if (defaultAnalyzer == null) {
|
||||
throw new IllegalArgumentException("no default analyzer configured");
|
||||
}
|
||||
if (analyzers.containsKey("default_index")) {
|
||||
final Version createdVersion = indexSettings.getIndexVersionCreated();
|
||||
if (createdVersion.onOrAfter(Version.V_5_0_0_alpha1)) {
|
||||
throw new IllegalArgumentException("setting [index.analysis.analyzer.default_index] is not supported anymore, use [index.analysis.analyzer.default] instead for index [" + index().getName() + "]");
|
||||
} else {
|
||||
deprecationLogger.deprecated("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index().getName());
|
||||
}
|
||||
}
|
||||
defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer;
|
||||
defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer;
|
||||
defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer;
|
||||
|
||||
for (Map.Entry<String, NamedAnalyzer> analyzer : analyzers.entrySet()) {
|
||||
if (analyzer.getKey().startsWith("_")) {
|
||||
throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\"");
|
||||
}
|
||||
}
|
||||
this.analyzers = unmodifiableMap(analyzers);
|
||||
}
|
||||
|
||||
private void processAnalyzerFactory(String name, AnalyzerProvider<?> analyzerFactory, Map<String, NamedAnalyzer> analyzerAliases, Map<String, NamedAnalyzer> analyzers) {
|
||||
/*
|
||||
* Lucene defaults positionIncrementGap to 0 in all analyzers but
|
||||
* Elasticsearch defaults them to 0 only before version 2.0
|
||||
@ -110,39 +152,17 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
||||
}
|
||||
analyzers.put(name, analyzer);
|
||||
String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
|
||||
Set<String> aliases = new HashSet<>();
|
||||
if (strAliases != null) {
|
||||
for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
|
||||
analyzers.put(alias, analyzer);
|
||||
aliases.addAll(Strings.commaDelimitedListToSet(strAliases));
|
||||
}
|
||||
}
|
||||
String[] aliases = this.indexSettings.getSettings().getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
|
||||
aliases.addAll(Arrays.asList(this.indexSettings.getSettings()
|
||||
.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias")));
|
||||
for (String alias : aliases) {
|
||||
analyzers.put(alias, analyzer);
|
||||
if (analyzerAliases.putIfAbsent(alias, analyzer) != null) {
|
||||
throw new IllegalStateException("alias [" + alias + "] is already used by [" + analyzerAliases.get(alias).name() + "]");
|
||||
}
|
||||
}
|
||||
|
||||
NamedAnalyzer defaultAnalyzer = analyzers.get("default");
|
||||
if (defaultAnalyzer == null) {
|
||||
throw new IllegalArgumentException("no default analyzer configured");
|
||||
}
|
||||
if (analyzers.containsKey("default_index")) {
|
||||
final Version createdVersion = indexSettings.getIndexVersionCreated();
|
||||
if (createdVersion.onOrAfter(Version.V_5_0_0_alpha1)) {
|
||||
throw new IllegalArgumentException("setting [index.analysis.analyzer.default_index] is not supported anymore, use [index.analysis.analyzer.default] instead for index [" + index().getName() + "]");
|
||||
} else {
|
||||
deprecationLogger.deprecated("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index().getName());
|
||||
}
|
||||
}
|
||||
defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer;
|
||||
defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer;
|
||||
defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer;
|
||||
|
||||
for (Map.Entry<String, NamedAnalyzer> analyzer : analyzers.entrySet()) {
|
||||
if (analyzer.getKey().startsWith("_")) {
|
||||
throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\"");
|
||||
}
|
||||
}
|
||||
this.analyzers = unmodifiableMap(analyzers);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -435,9 +435,9 @@ public abstract class FieldMapper extends Mapper implements Cloneable {
|
||||
boolean hasDifferentSearchQuoteAnalyzer = fieldType().searchAnalyzer().name().equals(fieldType().searchQuoteAnalyzer().name()) == false;
|
||||
if (includeDefaults || hasDefaultIndexAnalyzer == false || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
|
||||
builder.field("analyzer", fieldType().indexAnalyzer().name());
|
||||
if (hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
|
||||
if (includeDefaults || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
|
||||
builder.field("search_analyzer", fieldType().searchAnalyzer().name());
|
||||
if (hasDifferentSearchQuoteAnalyzer) {
|
||||
if (includeDefaults || hasDifferentSearchQuoteAnalyzer) {
|
||||
builder.field("search_quote_analyzer", fieldType().searchQuoteAnalyzer().name());
|
||||
}
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
@ -44,6 +45,7 @@ import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
@ -284,6 +286,46 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "keyword")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject().string();
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
// special case: default search analyzer
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "keyword")
|
||||
.field("search_analyzer", "default")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject().string();
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "keyword")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject().string();
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
|
||||
mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
|
||||
String mappingString = builder.string();
|
||||
assertTrue(mappingString.contains("analyzer"));
|
||||
assertTrue(mappingString.contains("search_analyzer"));
|
||||
assertTrue(mappingString.contains("search_quote_analyzer"));
|
||||
}
|
||||
|
||||
public void testSearchQuoteAnalyzerSerialization() throws IOException {
|
||||
|
@ -53,6 +53,7 @@ import org.junit.Before;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.emptyMap;
|
||||
@ -301,6 +302,48 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
// special case: default search analyzer
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "keyword")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject().string();
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
// special case: default search analyzer
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "keyword")
|
||||
.field("search_analyzer", "default")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject().string();
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "keyword")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject().string();
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
|
||||
mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
|
||||
String mappingString = builder.string();
|
||||
assertTrue(mappingString.contains("analyzer"));
|
||||
assertTrue(mappingString.contains("search_analyzer"));
|
||||
assertTrue(mappingString.contains("search_quote_analyzer"));
|
||||
}
|
||||
|
||||
private Map<String, Object> getSerializedMap(String fieldName, DocumentMapper mapper) throws Exception {
|
||||
|
@ -25,6 +25,8 @@ import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
|
||||
@ -52,6 +54,7 @@ import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||
import org.elasticsearch.test.IndexSettingsModule;
|
||||
import org.elasticsearch.test.VersionUtils;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
@ -126,29 +129,59 @@ public class AnalysisModuleTests extends ModuleTestCase {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.analyzer.foobar.alias","default")
|
||||
.put("index.analysis.analyzer.foobar.type", "keyword")
|
||||
.put("index.analysis.analyzer.foobar_search.alias","default_search")
|
||||
.put("index.analysis.analyzer.foobar_search.type","english")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
|
||||
.build();
|
||||
AnalysisRegistry newRegistry = getNewRegistry(settings);
|
||||
AnalysisService as = getAnalysisService(newRegistry, settings);
|
||||
assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
|
||||
|
||||
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(EnglishAnalyzer.class)));
|
||||
}
|
||||
|
||||
public void testDoubleAlias() throws IOException {
|
||||
public void testAnalyzerAliasReferencesAlias() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.analyzer.foobar.alias","default")
|
||||
.put("index.analysis.analyzer.foobar.type", "german")
|
||||
.put("index.analysis.analyzer.foobar_search.alias","default_search")
|
||||
.put("index.analysis.analyzer.foobar_search.type", "default")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
|
||||
.build();
|
||||
AnalysisRegistry newRegistry = getNewRegistry(settings);
|
||||
AnalysisService as = getAnalysisService(newRegistry, settings);
|
||||
assertThat(as.analyzer("default").analyzer(), is(instanceOf(GermanAnalyzer.class)));
|
||||
// analyzer types are bound early before we resolve aliases
|
||||
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(StandardAnalyzer.class)));
|
||||
}
|
||||
|
||||
public void testAnalyzerAliasDefault() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.analyzer.foobar.alias","default")
|
||||
.put("index.analysis.analyzer.foobar.type", "keyword")
|
||||
.put("index.analysis.analyzer.barfoo.alias","default")
|
||||
.put("index.analysis.analyzer.barfoo.type","english")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
|
||||
.build();
|
||||
AnalysisRegistry newRegistry = getNewRegistry(settings);
|
||||
String message = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings)).getMessage();
|
||||
assertEquals("already registered analyzer with name: default", message);
|
||||
AnalysisService as = getAnalysisService(newRegistry, settings);
|
||||
assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
|
||||
assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
|
||||
}
|
||||
|
||||
public void testAnalyzerAliasMoreThanOnce() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.analyzer.foobar.alias","default")
|
||||
.put("index.analysis.analyzer.foobar.type", "keyword")
|
||||
.put("index.analysis.analyzer.foobar1.alias","default")
|
||||
.put("index.analysis.analyzer.foobar1.type", "english")
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
AnalysisRegistry newRegistry = getNewRegistry(settings);
|
||||
IllegalStateException ise = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings));
|
||||
assertEquals("alias [default] is already used by [foobar]", ise.getMessage());
|
||||
}
|
||||
public void testVersionedAnalyzers() throws Exception {
|
||||
String yaml = "/org/elasticsearch/index/analysis/test1.yml";
|
||||
Settings settings2 = Settings.builder()
|
||||
|
@ -32,12 +32,15 @@ import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.index.query.TermQueryBuilder;
|
||||
import org.elasticsearch.indices.IndexTemplateAlreadyExistsException;
|
||||
import org.elasticsearch.indices.InvalidAliasNameException;
|
||||
import org.elasticsearch.search.SearchHit;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
|
@ -70,4 +70,51 @@
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
---
|
||||
"Put template with analyzer alias":
|
||||
|
||||
- do:
|
||||
indices.put_template:
|
||||
name: test
|
||||
create: true
|
||||
order: 0
|
||||
body:
|
||||
template: test_*
|
||||
settings:
|
||||
index.analysis.analyzer.foobar.alias: "default"
|
||||
index.analysis.analyzer.foobar.type: "keyword"
|
||||
index.analysis.analyzer.foobar_search.alias: "default_search"
|
||||
index.analysis.analyzer.foobar_search.type: "standard"
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test_index
|
||||
type: test
|
||||
body: { field: "the quick brown fox" }
|
||||
|
||||
- do:
|
||||
indices.refresh:
|
||||
index: test_index
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_index
|
||||
type: test
|
||||
body:
|
||||
query:
|
||||
term:
|
||||
field: "the quick brown fox"
|
||||
|
||||
- match: {hits.total: 1}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_index
|
||||
type: test
|
||||
body:
|
||||
query:
|
||||
match:
|
||||
field: "the quick brown fox"
|
||||
|
||||
- match: {hits.total: 0}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user