Add the ability to set an analyzer on keyword fields. (#21919)
This adds a new `normalizer` property to `keyword` fields that pre-processes the field value prior to indexing, but without altering the `_source`. Note that only the normalization components that work on a per-character basis are applied, so for instance stemming filters will be ignored while lowercasing or ascii folding will be applied. Closes #18064
This commit is contained in:
parent
117b63ed41
commit
3f805d68cb
|
@ -145,11 +145,10 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Set<Entry<String, NamedAnalyzer>> entrySet() {
|
public Set<Entry<String, NamedAnalyzer>> entrySet() {
|
||||||
// just to ensure we can iterate over this single analzyer
|
return Collections.emptySet();
|
||||||
return Collections.singletonMap(fakeDefault.name(), fakeDefault).entrySet();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap)) {
|
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap)) {
|
||||||
MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService,
|
MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService,
|
||||||
mapperRegistry, () -> null);
|
mapperRegistry, () -> null);
|
||||||
mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY, false);
|
mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY, false);
|
||||||
|
|
|
@ -67,17 +67,20 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
private final Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters;
|
private final Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters;
|
||||||
private final Map<String, AnalysisProvider<TokenizerFactory>> tokenizers;
|
private final Map<String, AnalysisProvider<TokenizerFactory>> tokenizers;
|
||||||
private final Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers;
|
private final Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers;
|
||||||
|
private final Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers;
|
||||||
|
|
||||||
public AnalysisRegistry(Environment environment,
|
public AnalysisRegistry(Environment environment,
|
||||||
Map<String, AnalysisProvider<CharFilterFactory>> charFilters,
|
Map<String, AnalysisProvider<CharFilterFactory>> charFilters,
|
||||||
Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters,
|
Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters,
|
||||||
Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
|
Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
|
||||||
Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers) {
|
Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers,
|
||||||
|
Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers) {
|
||||||
this.environment = environment;
|
this.environment = environment;
|
||||||
this.charFilters = unmodifiableMap(charFilters);
|
this.charFilters = unmodifiableMap(charFilters);
|
||||||
this.tokenFilters = unmodifiableMap(tokenFilters);
|
this.tokenFilters = unmodifiableMap(tokenFilters);
|
||||||
this.tokenizers = unmodifiableMap(tokenizers);
|
this.tokenizers = unmodifiableMap(tokenizers);
|
||||||
this.analyzers = unmodifiableMap(analyzers);
|
this.analyzers = unmodifiableMap(analyzers);
|
||||||
|
this.normalizers = unmodifiableMap(normalizers);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -151,7 +154,8 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
final Map<String, TokenizerFactory> tokenizerFactories = buildTokenizerFactories(indexSettings);
|
final Map<String, TokenizerFactory> tokenizerFactories = buildTokenizerFactories(indexSettings);
|
||||||
final Map<String, TokenFilterFactory> tokenFilterFactories = buildTokenFilterFactories(indexSettings);
|
final Map<String, TokenFilterFactory> tokenFilterFactories = buildTokenFilterFactories(indexSettings);
|
||||||
final Map<String, AnalyzerProvider<?>> analyzierFactories = buildAnalyzerFactories(indexSettings);
|
final Map<String, AnalyzerProvider<?>> analyzierFactories = buildAnalyzerFactories(indexSettings);
|
||||||
return build(indexSettings, analyzierFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
|
final Map<String, AnalyzerProvider<?>> normalizerFactories = buildNormalizerFactories(indexSettings);
|
||||||
|
return build(indexSettings, analyzierFactories, normalizerFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
|
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
|
||||||
|
@ -164,22 +168,28 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
*/
|
*/
|
||||||
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
|
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
|
||||||
tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings)));
|
tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings)));
|
||||||
return buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
|
return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
|
public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
|
||||||
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
|
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
|
||||||
return buildMapping(false, "tokenizer", indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.tokenizerFactories);
|
return buildMapping(Component.TOKENIZER, indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.tokenizerFactories);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, CharFilterFactory> buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
|
public Map<String, CharFilterFactory> buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
|
||||||
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
|
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
|
||||||
return buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
|
return buildMapping(Component.CHAR_FILTER, indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, AnalyzerProvider<?>> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
|
public Map<String, AnalyzerProvider<?>> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
|
||||||
final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
|
final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
|
||||||
return buildMapping(true, "analyzer", indexSettings, analyzersSettings, analyzers, prebuiltAnalysis.analyzerProviderFactories);
|
return buildMapping(Component.ANALYZER, indexSettings, analyzersSettings, analyzers, prebuiltAnalysis.analyzerProviderFactories);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, AnalyzerProvider<?>> buildNormalizerFactories(IndexSettings indexSettings) throws IOException {
|
||||||
|
final Map<String, Settings> noralizersSettings = indexSettings.getSettings().getGroups("index.analysis.normalizer");
|
||||||
|
// TODO: Have pre-built normalizers
|
||||||
|
return buildMapping(Component.NORMALIZER, indexSettings, noralizersSettings, normalizers, Collections.emptyMap());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -194,7 +204,7 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
|
final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
|
||||||
if (tokenizerSettings.containsKey(tokenizer)) {
|
if (tokenizerSettings.containsKey(tokenizer)) {
|
||||||
Settings currentSettings = tokenizerSettings.get(tokenizer);
|
Settings currentSettings = tokenizerSettings.get(tokenizer);
|
||||||
return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type"));
|
return getAnalysisProvider(Component.TOKENIZER, tokenizers, tokenizer, currentSettings.get("type"));
|
||||||
} else {
|
} else {
|
||||||
return getTokenizerProvider(tokenizer);
|
return getTokenizerProvider(tokenizer);
|
||||||
}
|
}
|
||||||
|
@ -223,7 +233,7 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
} else if ("synonym_graph".equals(typeName)) {
|
} else if ("synonym_graph".equals(typeName)) {
|
||||||
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings));
|
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings));
|
||||||
} else {
|
} else {
|
||||||
return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName);
|
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return getTokenFilterProvider(tokenFilter);
|
return getTokenFilterProvider(tokenFilter);
|
||||||
|
@ -242,7 +252,7 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
|
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
|
||||||
if (tokenFilterSettings.containsKey(charFilter)) {
|
if (tokenFilterSettings.containsKey(charFilter)) {
|
||||||
Settings currentSettings = tokenFilterSettings.get(charFilter);
|
Settings currentSettings = tokenFilterSettings.get(charFilter);
|
||||||
return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type"));
|
return getAnalysisProvider(Component.CHAR_FILTER, charFilters, charFilter, currentSettings.get("type"));
|
||||||
} else {
|
} else {
|
||||||
return getCharFilterProvider(charFilter);
|
return getCharFilterProvider(charFilter);
|
||||||
}
|
}
|
||||||
|
@ -261,7 +271,40 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> Map<String, T> buildMapping(boolean analyzer, String toBuild, IndexSettings settings, Map<String, Settings> settingsMap,
|
enum Component {
|
||||||
|
ANALYZER {
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "analyzer";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
NORMALIZER {
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "normalizer";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
CHAR_FILTER {
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "char_filter";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
TOKENIZER {
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "tokenizer";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
FILTER {
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "filter";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> Map<String, T> buildMapping(Component component, IndexSettings settings, Map<String, Settings> settingsMap,
|
||||||
Map<String, AnalysisModule.AnalysisProvider<T>> providerMap, Map<String, AnalysisModule.AnalysisProvider<T>> defaultInstance)
|
Map<String, AnalysisModule.AnalysisProvider<T>> providerMap, Map<String, AnalysisModule.AnalysisProvider<T>> defaultInstance)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Settings defaultSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, settings.getIndexVersionCreated()).build();
|
Settings defaultSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, settings.getIndexVersionCreated()).build();
|
||||||
|
@ -270,29 +313,34 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
String name = entry.getKey();
|
String name = entry.getKey();
|
||||||
Settings currentSettings = entry.getValue();
|
Settings currentSettings = entry.getValue();
|
||||||
String typeName = currentSettings.get("type");
|
String typeName = currentSettings.get("type");
|
||||||
if (analyzer) {
|
if (component == Component.ANALYZER) {
|
||||||
T factory;
|
T factory = null;
|
||||||
if (typeName == null) {
|
if (typeName == null) {
|
||||||
if (currentSettings.get("tokenizer") != null) {
|
if (currentSettings.get("tokenizer") != null) {
|
||||||
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
|
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||||
}
|
}
|
||||||
} else if (typeName.equals("custom")) {
|
} else if (typeName.equals("custom")) {
|
||||||
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
|
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
|
||||||
} else {
|
|
||||||
AnalysisModule.AnalysisProvider<T> type = providerMap.get(typeName);
|
|
||||||
if (type == null) {
|
|
||||||
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
|
|
||||||
}
|
|
||||||
factory = type.get(settings, environment, name, currentSettings);
|
|
||||||
}
|
}
|
||||||
factories.put(name, factory);
|
if (factory != null) {
|
||||||
} else {
|
factories.put(name, factory);
|
||||||
AnalysisProvider<T> type = getAnalysisProvider(toBuild, providerMap, name, typeName);
|
continue;
|
||||||
final T factory = type.get(settings, environment, name, currentSettings);
|
}
|
||||||
factories.put(name, factory);
|
} else if (component == Component.NORMALIZER) {
|
||||||
|
if (typeName == null || typeName.equals("custom")) {
|
||||||
|
T factory = (T) new CustomNormalizerProvider(settings, name, currentSettings);
|
||||||
|
factories.put(name, factory);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
AnalysisProvider<T> type = getAnalysisProvider(component, providerMap, name, typeName);
|
||||||
|
if (type == null) {
|
||||||
|
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
|
||||||
|
}
|
||||||
|
final T factory = type.get(settings, environment, name, currentSettings);
|
||||||
|
factories.put(name, factory);
|
||||||
|
|
||||||
}
|
}
|
||||||
// go over the char filters in the bindings and register the ones that are not configured
|
// go over the char filters in the bindings and register the ones that are not configured
|
||||||
|
@ -330,13 +378,13 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
return factories;
|
return factories;
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> AnalysisProvider<T> getAnalysisProvider(String toBuild, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
|
private <T> AnalysisProvider<T> getAnalysisProvider(Component component, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
|
||||||
if (typeName == null) {
|
if (typeName == null) {
|
||||||
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||||
}
|
}
|
||||||
AnalysisProvider<T> type = providerMap.get(typeName);
|
AnalysisProvider<T> type = providerMap.get(typeName);
|
||||||
if (type == null) {
|
if (type == null) {
|
||||||
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
|
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
|
||||||
}
|
}
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
@ -426,6 +474,7 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
|
|
||||||
public IndexAnalyzers build(IndexSettings indexSettings,
|
public IndexAnalyzers build(IndexSettings indexSettings,
|
||||||
Map<String, AnalyzerProvider<?>> analyzerProviders,
|
Map<String, AnalyzerProvider<?>> analyzerProviders,
|
||||||
|
Map<String, AnalyzerProvider<?>> normalizerProviders,
|
||||||
Map<String, TokenizerFactory> tokenizerFactoryFactories,
|
Map<String, TokenizerFactory> tokenizerFactoryFactories,
|
||||||
Map<String, CharFilterFactory> charFilterFactoryFactories,
|
Map<String, CharFilterFactory> charFilterFactoryFactories,
|
||||||
Map<String, TokenFilterFactory> tokenFilterFactoryFactories) {
|
Map<String, TokenFilterFactory> tokenFilterFactoryFactories) {
|
||||||
|
@ -436,10 +485,15 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
DeprecationLogger deprecationLogger = new DeprecationLogger(logger);
|
DeprecationLogger deprecationLogger = new DeprecationLogger(logger);
|
||||||
Map<String, NamedAnalyzer> analyzerAliases = new HashMap<>();
|
Map<String, NamedAnalyzer> analyzerAliases = new HashMap<>();
|
||||||
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
|
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
|
||||||
|
Map<String, NamedAnalyzer> normalizers = new HashMap<>();
|
||||||
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
|
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
|
||||||
processAnalyzerFactory(deprecationLogger, indexSettings, entry.getKey(), entry.getValue(), analyzerAliases, analyzers,
|
processAnalyzerFactory(deprecationLogger, indexSettings, entry.getKey(), entry.getValue(), analyzerAliases, analyzers,
|
||||||
tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories);
|
tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories);
|
||||||
}
|
}
|
||||||
|
for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) {
|
||||||
|
processNormalizerFactory(deprecationLogger, indexSettings, entry.getKey(), entry.getValue(), normalizers,
|
||||||
|
tokenFilterFactoryFactories, charFilterFactoryFactories);
|
||||||
|
}
|
||||||
for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
|
for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
|
||||||
String key = entry.getKey();
|
String key = entry.getKey();
|
||||||
if (analyzers.containsKey(key) &&
|
if (analyzers.containsKey(key) &&
|
||||||
|
@ -485,7 +539,7 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new IndexAnalyzers(indexSettings, defaultIndexAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer,
|
return new IndexAnalyzers(indexSettings, defaultIndexAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer,
|
||||||
unmodifiableMap(analyzers));
|
unmodifiableMap(analyzers), unmodifiableMap(normalizers));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processAnalyzerFactory(DeprecationLogger deprecationLogger,
|
private void processAnalyzerFactory(DeprecationLogger deprecationLogger,
|
||||||
|
@ -551,4 +605,25 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void processNormalizerFactory(DeprecationLogger deprecationLogger,
|
||||||
|
IndexSettings indexSettings,
|
||||||
|
String name,
|
||||||
|
AnalyzerProvider<?> normalizerFactory,
|
||||||
|
Map<String, NamedAnalyzer> normalizers,
|
||||||
|
Map<String, TokenFilterFactory> tokenFilters,
|
||||||
|
Map<String, CharFilterFactory> charFilters) {
|
||||||
|
if (normalizerFactory instanceof CustomNormalizerProvider) {
|
||||||
|
((CustomNormalizerProvider) normalizerFactory).build(charFilters, tokenFilters);
|
||||||
|
}
|
||||||
|
Analyzer normalizerF = normalizerFactory.get();
|
||||||
|
if (normalizerF == null) {
|
||||||
|
throw new IllegalArgumentException("normalizer [" + normalizerFactory.name() + "] created null normalizer");
|
||||||
|
}
|
||||||
|
NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF);
|
||||||
|
if (normalizers.containsKey(name)) {
|
||||||
|
throw new IllegalStateException("already registered analyzer with name: " + name);
|
||||||
|
}
|
||||||
|
normalizers.put(name, normalizer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,4 +94,27 @@ public final class CustomAnalyzer extends Analyzer {
|
||||||
}
|
}
|
||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
|
||||||
|
for (CharFilterFactory charFilter : charFilters) {
|
||||||
|
if (charFilter instanceof MultiTermAwareComponent) {
|
||||||
|
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
|
||||||
|
reader = charFilter.create(reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||||
|
TokenStream result = in;
|
||||||
|
for (TokenFilterFactory filter : tokenFilters) {
|
||||||
|
if (filter instanceof MultiTermAwareComponent) {
|
||||||
|
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
|
||||||
|
result = filter.create(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A custom normalizer that is built out of a char and token filters. On the
|
||||||
|
* contrary to analyzers, it does not support tokenizers and only supports a
|
||||||
|
* subset of char and token filters.
|
||||||
|
*/
|
||||||
|
public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvider<CustomAnalyzer> {
|
||||||
|
|
||||||
|
private final Settings analyzerSettings;
|
||||||
|
|
||||||
|
private CustomAnalyzer customAnalyzer;
|
||||||
|
|
||||||
|
public CustomNormalizerProvider(IndexSettings indexSettings,
|
||||||
|
String name, Settings settings) {
|
||||||
|
super(indexSettings, name, settings);
|
||||||
|
this.analyzerSettings = settings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void build(final Map<String, CharFilterFactory> charFilters, final Map<String, TokenFilterFactory> tokenFilters) {
|
||||||
|
String tokenizerName = analyzerSettings.get("tokenizer");
|
||||||
|
if (tokenizerName != null) {
|
||||||
|
throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer");
|
||||||
|
}
|
||||||
|
|
||||||
|
List<CharFilterFactory> charFiltersList = new ArrayList<>();
|
||||||
|
String[] charFilterNames = analyzerSettings.getAsArray("char_filter");
|
||||||
|
for (String charFilterName : charFilterNames) {
|
||||||
|
CharFilterFactory charFilter = charFilters.get(charFilterName);
|
||||||
|
if (charFilter == null) {
|
||||||
|
throw new IllegalArgumentException("Custom normalizer [" + name() + "] failed to find char_filter under name ["
|
||||||
|
+ charFilterName + "]");
|
||||||
|
}
|
||||||
|
if (charFilter instanceof MultiTermAwareComponent == false) {
|
||||||
|
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use char filter ["
|
||||||
|
+ charFilterName + "]");
|
||||||
|
}
|
||||||
|
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
|
||||||
|
charFiltersList.add(charFilter);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<TokenFilterFactory> tokenFilterList = new ArrayList<>();
|
||||||
|
String[] tokenFilterNames = analyzerSettings.getAsArray("filter");
|
||||||
|
for (String tokenFilterName : tokenFilterNames) {
|
||||||
|
TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
|
||||||
|
if (tokenFilter == null) {
|
||||||
|
throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find filter under name ["
|
||||||
|
+ tokenFilterName + "]");
|
||||||
|
}
|
||||||
|
if (tokenFilter instanceof MultiTermAwareComponent == false) {
|
||||||
|
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use filter [" + tokenFilterName + "]");
|
||||||
|
}
|
||||||
|
tokenFilter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilter).getMultiTermComponent();
|
||||||
|
tokenFilterList.add(tokenFilter);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.customAnalyzer = new CustomAnalyzer(
|
||||||
|
PreBuiltTokenizers.KEYWORD.getTokenizerFactory(indexSettings.getIndexVersionCreated()),
|
||||||
|
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
|
||||||
|
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CustomAnalyzer get() {
|
||||||
|
return this.customAnalyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,6 +25,7 @@ import org.elasticsearch.index.IndexSettings;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IndexAnalyzers contains a name to analyzer mapping for a specific index.
|
* IndexAnalyzers contains a name to analyzer mapping for a specific index.
|
||||||
|
@ -38,15 +39,18 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
||||||
private final NamedAnalyzer defaultSearchAnalyzer;
|
private final NamedAnalyzer defaultSearchAnalyzer;
|
||||||
private final NamedAnalyzer defaultSearchQuoteAnalyzer;
|
private final NamedAnalyzer defaultSearchQuoteAnalyzer;
|
||||||
private final Map<String, NamedAnalyzer> analyzers;
|
private final Map<String, NamedAnalyzer> analyzers;
|
||||||
|
private final Map<String, NamedAnalyzer> normalizers;
|
||||||
private final IndexSettings indexSettings;
|
private final IndexSettings indexSettings;
|
||||||
|
|
||||||
public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer,
|
public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer,
|
||||||
NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers) {
|
NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers,
|
||||||
|
Map<String, NamedAnalyzer> normalizers) {
|
||||||
super(indexSettings);
|
super(indexSettings);
|
||||||
this.defaultIndexAnalyzer = defaultIndexAnalyzer;
|
this.defaultIndexAnalyzer = defaultIndexAnalyzer;
|
||||||
this.defaultSearchAnalyzer = defaultSearchAnalyzer;
|
this.defaultSearchAnalyzer = defaultSearchAnalyzer;
|
||||||
this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer;
|
this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer;
|
||||||
this.analyzers = analyzers;
|
this.analyzers = analyzers;
|
||||||
|
this.normalizers = normalizers;
|
||||||
this.indexSettings = indexSettings;
|
this.indexSettings = indexSettings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,6 +61,12 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
||||||
return analyzers.get(name);
|
return analyzers.get(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a normalizer mapped to the given name or <code>null</code> if not present
|
||||||
|
*/
|
||||||
|
public NamedAnalyzer getNormalizer(String name) {
|
||||||
|
return normalizers.get(name);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the default index analyzer for this index
|
* Returns the default index analyzer for this index
|
||||||
|
@ -81,7 +91,7 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
IOUtils.close(() -> analyzers.values().stream()
|
IOUtils.close(() -> Stream.concat(analyzers.values().stream(), normalizers.values().stream())
|
||||||
.filter(a -> a.scope() == AnalyzerScope.INDEX)
|
.filter(a -> a.scope() == AnalyzerScope.INDEX)
|
||||||
.iterator());
|
.iterator());
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,16 +19,20 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.mapper;
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||||
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
||||||
|
|
||||||
|
@ -36,6 +40,7 @@ import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
import static org.elasticsearch.index.mapper.TypeParsers.parseField;
|
import static org.elasticsearch.index.mapper.TypeParsers.parseField;
|
||||||
|
|
||||||
|
@ -70,6 +75,11 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
builder = this;
|
builder = this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public KeywordFieldType fieldType() {
|
||||||
|
return (KeywordFieldType) super.fieldType();
|
||||||
|
}
|
||||||
|
|
||||||
public Builder ignoreAbove(int ignoreAbove) {
|
public Builder ignoreAbove(int ignoreAbove) {
|
||||||
if (ignoreAbove < 0) {
|
if (ignoreAbove < 0) {
|
||||||
throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove);
|
throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove);
|
||||||
|
@ -92,6 +102,12 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder normalizer(NamedAnalyzer normalizer) {
|
||||||
|
fieldType().setNormalizer(normalizer);
|
||||||
|
fieldType().setSearchAnalyzer(normalizer);
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public KeywordFieldMapper build(BuilderContext context) {
|
public KeywordFieldMapper build(BuilderContext context) {
|
||||||
setupFieldType(context);
|
setupFieldType(context);
|
||||||
|
@ -103,7 +119,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
public static class TypeParser implements Mapper.TypeParser {
|
public static class TypeParser implements Mapper.TypeParser {
|
||||||
@Override
|
@Override
|
||||||
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||||
KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder(name);
|
KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder(name);
|
||||||
parseField(builder, name, node, parserContext);
|
parseField(builder, name, node, parserContext);
|
||||||
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
|
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
|
||||||
|
@ -125,6 +141,15 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
} else if (propName.equals("eager_global_ordinals")) {
|
} else if (propName.equals("eager_global_ordinals")) {
|
||||||
builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode));
|
builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode));
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
|
} else if (propName.equals("normalizer")) {
|
||||||
|
if (propNode != null) {
|
||||||
|
NamedAnalyzer normalizer = parserContext.getIndexAnalyzers().getNormalizer(propNode.toString());
|
||||||
|
if (normalizer == null) {
|
||||||
|
throw new MapperParsingException("normalizer [" + propNode.toString() + "] not found for field [" + name + "]");
|
||||||
|
}
|
||||||
|
builder.normalizer(normalizer);
|
||||||
|
}
|
||||||
|
iterator.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return builder;
|
return builder;
|
||||||
|
@ -133,21 +158,58 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
public static final class KeywordFieldType extends StringFieldType {
|
public static final class KeywordFieldType extends StringFieldType {
|
||||||
|
|
||||||
public KeywordFieldType() {}
|
private NamedAnalyzer normalizer = null;
|
||||||
|
|
||||||
|
public KeywordFieldType() {
|
||||||
|
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||||
|
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||||
|
}
|
||||||
|
|
||||||
protected KeywordFieldType(KeywordFieldType ref) {
|
protected KeywordFieldType(KeywordFieldType ref) {
|
||||||
super(ref);
|
super(ref);
|
||||||
|
this.normalizer = ref.normalizer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public KeywordFieldType clone() {
|
public KeywordFieldType clone() {
|
||||||
return new KeywordFieldType(this);
|
return new KeywordFieldType(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (super.equals(o) == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return Objects.equals(normalizer, ((KeywordFieldType) o).normalizer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkCompatibility(MappedFieldType otherFT, List<String> conflicts, boolean strict) {
|
||||||
|
super.checkCompatibility(otherFT, conflicts, strict);
|
||||||
|
KeywordFieldType other = (KeywordFieldType) otherFT;
|
||||||
|
if (Objects.equals(normalizer, other.normalizer) == false) {
|
||||||
|
conflicts.add("mapper [" + name() + "] has different [normalizer]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 31 * super.hashCode() + Objects.hashCode(normalizer);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String typeName() {
|
public String typeName() {
|
||||||
return CONTENT_TYPE;
|
return CONTENT_TYPE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public NamedAnalyzer normalizer() {
|
||||||
|
return normalizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNormalizer(NamedAnalyzer normalizer) {
|
||||||
|
checkIfFrozen();
|
||||||
|
this.normalizer = normalizer;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query nullValueQuery() {
|
public Query nullValueQuery() {
|
||||||
if (nullValue() == null) {
|
if (nullValue() == null) {
|
||||||
|
@ -171,13 +233,25 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
BytesRef binaryValue = (BytesRef) value;
|
BytesRef binaryValue = (BytesRef) value;
|
||||||
return binaryValue.utf8ToString();
|
return binaryValue.utf8ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BytesRef indexedValueForSearch(Object value) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (value instanceof BytesRef) {
|
||||||
|
value = ((BytesRef) value).utf8ToString();
|
||||||
|
}
|
||||||
|
return searchAnalyzer().normalize(name(), value.toString());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Boolean includeInAll;
|
private Boolean includeInAll;
|
||||||
private int ignoreAbove;
|
private int ignoreAbove;
|
||||||
|
|
||||||
protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||||
int ignoreAbove, Boolean includeInAll, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
int ignoreAbove, Boolean includeInAll,
|
||||||
|
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
||||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||||
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
|
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
|
||||||
this.ignoreAbove = ignoreAbove;
|
this.ignoreAbove = ignoreAbove;
|
||||||
|
@ -196,6 +270,11 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
return (KeywordFieldMapper) super.clone();
|
return (KeywordFieldMapper) super.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public KeywordFieldType fieldType() {
|
||||||
|
return (KeywordFieldType) super.fieldType();
|
||||||
|
}
|
||||||
|
|
||||||
// pkg-private for testing
|
// pkg-private for testing
|
||||||
Boolean includeInAll() {
|
Boolean includeInAll() {
|
||||||
return includeInAll;
|
return includeInAll;
|
||||||
|
@ -203,7 +282,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
|
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
|
||||||
final String value;
|
String value;
|
||||||
if (context.externalValueSet()) {
|
if (context.externalValueSet()) {
|
||||||
value = context.externalValue().toString();
|
value = context.externalValue().toString();
|
||||||
} else {
|
} else {
|
||||||
|
@ -219,6 +298,27 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final NamedAnalyzer normalizer = fieldType().normalizer();
|
||||||
|
if (normalizer != null) {
|
||||||
|
try (final TokenStream ts = normalizer.tokenStream(name(), value)) {
|
||||||
|
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
ts.reset();
|
||||||
|
if (ts.incrementToken() == false) {
|
||||||
|
throw new IllegalStateException("The normalization token stream is "
|
||||||
|
+ "expected to produce exactly 1 token, but got 0 for analyzer "
|
||||||
|
+ normalizer + " and input \"" + value + "\"");
|
||||||
|
}
|
||||||
|
final String newValue = termAtt.toString();
|
||||||
|
if (ts.incrementToken()) {
|
||||||
|
throw new IllegalStateException("The normalization token stream is "
|
||||||
|
+ "expected to produce exactly 1 token, but got 2+ for analyzer "
|
||||||
|
+ normalizer + " and input \"" + value + "\"");
|
||||||
|
}
|
||||||
|
ts.end();
|
||||||
|
value = newValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (context.includeInAll(includeInAll, this)) {
|
if (context.includeInAll(includeInAll, this)) {
|
||||||
context.allEntries().addText(fieldType().name(), value, fieldType().boost());
|
context.allEntries().addText(fieldType().name(), value, fieldType().boost());
|
||||||
}
|
}
|
||||||
|
@ -263,5 +363,11 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) {
|
if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) {
|
||||||
builder.field("ignore_above", ignoreAbove);
|
builder.field("ignore_above", ignoreAbove);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fieldType().normalizer() != null) {
|
||||||
|
builder.field("normalizer", fieldType().normalizer().name());
|
||||||
|
} else if (includeDefaults) {
|
||||||
|
builder.nullField("normalizer");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -170,8 +170,9 @@ public final class AnalysisModule {
|
||||||
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
|
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
|
||||||
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
|
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
|
||||||
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
|
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
|
||||||
|
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers(plugins);
|
||||||
analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers
|
analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers
|
||||||
.getRegistry(), analyzers.getRegistry());
|
.getRegistry(), analyzers.getRegistry(), normalizers.getRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
HunspellService getHunspellService() {
|
HunspellService getHunspellService() {
|
||||||
|
@ -334,6 +335,13 @@ public final class AnalysisModule {
|
||||||
return analyzers;
|
return analyzers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> setupNormalizers(List<AnalysisPlugin> plugins) {
|
||||||
|
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = new NamedRegistry<>("normalizer");
|
||||||
|
// TODO: provide built-in normalizer providers?
|
||||||
|
// TODO: pluggability?
|
||||||
|
return normalizers;
|
||||||
|
}
|
||||||
|
|
||||||
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
|
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
|
||||||
return new AnalysisModule.AnalysisProvider<T>() {
|
return new AnalysisModule.AnalysisProvider<T>() {
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -435,7 +435,7 @@ public class GatewayIndexStateIT extends ESIntegTestCase {
|
||||||
assertEquals(ex.getMessage(), "Failed to verify index " + metaData.getIndex());
|
assertEquals(ex.getMessage(), "Failed to verify index " + metaData.getIndex());
|
||||||
assertNotNull(ex.getCause());
|
assertNotNull(ex.getCause());
|
||||||
assertEquals(IllegalArgumentException.class, ex.getCause().getClass());
|
assertEquals(IllegalArgumentException.class, ex.getCause().getClass());
|
||||||
assertEquals(ex.getCause().getMessage(), "Unknown tokenfilter type [icu_collation] for [myCollator]");
|
assertEquals(ex.getCause().getMessage(), "Unknown filter type [icu_collation] for [myCollator]");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -148,7 +148,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
|
|
||||||
public void testWrapperIsBound() throws IOException {
|
public void testWrapperIsBound() throws IOException {
|
||||||
IndexModule module = new IndexModule(indexSettings,
|
IndexModule module = new IndexModule(indexSettings,
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
module.setSearcherWrapper((s) -> new Wrapper());
|
module.setSearcherWrapper((s) -> new Wrapper());
|
||||||
module.engineFactory.set(new MockEngineFactory(AssertingDirectoryReader.class));
|
module.engineFactory.set(new MockEngineFactory(AssertingDirectoryReader.class));
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
.build();
|
.build();
|
||||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
||||||
IndexModule module = new IndexModule(indexSettings,
|
IndexModule module = new IndexModule(indexSettings,
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
module.addIndexStore("foo_store", FooStore::new);
|
module.addIndexStore("foo_store", FooStore::new);
|
||||||
try {
|
try {
|
||||||
module.addIndexStore("foo_store", FooStore::new);
|
module.addIndexStore("foo_store", FooStore::new);
|
||||||
|
@ -193,7 +193,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
};
|
};
|
||||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
||||||
IndexModule module = new IndexModule(indexSettings,
|
IndexModule module = new IndexModule(indexSettings,
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
module.addIndexEventListener(eventListener);
|
module.addIndexEventListener(eventListener);
|
||||||
IndexService indexService = newIndexService(module);
|
IndexService indexService = newIndexService(module);
|
||||||
IndexSettings x = indexService.getIndexSettings();
|
IndexSettings x = indexService.getIndexSettings();
|
||||||
|
@ -208,7 +208,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
public void testListener() throws IOException {
|
public void testListener() throws IOException {
|
||||||
Setting<Boolean> booleanSetting = Setting.boolSetting("index.foo.bar", false, Property.Dynamic, Property.IndexScope);
|
Setting<Boolean> booleanSetting = Setting.boolSetting("index.foo.bar", false, Property.Dynamic, Property.IndexScope);
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings, booleanSetting),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings, booleanSetting),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
Setting<Boolean> booleanSetting2 = Setting.boolSetting("index.foo.bar.baz", false, Property.Dynamic, Property.IndexScope);
|
Setting<Boolean> booleanSetting2 = Setting.boolSetting("index.foo.bar.baz", false, Property.Dynamic, Property.IndexScope);
|
||||||
AtomicBoolean atomicBoolean = new AtomicBoolean(false);
|
AtomicBoolean atomicBoolean = new AtomicBoolean(false);
|
||||||
module.addSettingsUpdateConsumer(booleanSetting, atomicBoolean::set);
|
module.addSettingsUpdateConsumer(booleanSetting, atomicBoolean::set);
|
||||||
|
@ -228,7 +228,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
|
|
||||||
public void testAddIndexOperationListener() throws IOException {
|
public void testAddIndexOperationListener() throws IOException {
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
AtomicBoolean executed = new AtomicBoolean(false);
|
AtomicBoolean executed = new AtomicBoolean(false);
|
||||||
IndexingOperationListener listener = new IndexingOperationListener() {
|
IndexingOperationListener listener = new IndexingOperationListener() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -257,7 +257,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
|
|
||||||
public void testAddSearchOperationListener() throws IOException {
|
public void testAddSearchOperationListener() throws IOException {
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
AtomicBoolean executed = new AtomicBoolean(false);
|
AtomicBoolean executed = new AtomicBoolean(false);
|
||||||
SearchOperationListener listener = new SearchOperationListener() {
|
SearchOperationListener listener = new SearchOperationListener() {
|
||||||
|
|
||||||
|
@ -291,7 +291,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
module.addSimilarity("test_similarity", (string, settings) -> new SimilarityProvider() {
|
module.addSimilarity("test_similarity", (string, settings) -> new SimilarityProvider() {
|
||||||
@Override
|
@Override
|
||||||
public String name() {
|
public String name() {
|
||||||
|
@ -315,7 +315,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
|
|
||||||
public void testFrozen() {
|
public void testFrozen() {
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
module.freeze();
|
module.freeze();
|
||||||
String msg = "Can't modify IndexModule once the index service has been created";
|
String msg = "Can't modify IndexModule once the index service has been created";
|
||||||
assertEquals(msg, expectThrows(IllegalStateException.class, () -> module.addSearchOperationListener(null)).getMessage());
|
assertEquals(msg, expectThrows(IllegalStateException.class, () -> module.addSearchOperationListener(null)).getMessage());
|
||||||
|
@ -334,7 +334,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
|
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
|
||||||
assertEquals("Unknown Similarity type [test_similarity] for [my_similarity]", ex.getMessage());
|
assertEquals("Unknown Similarity type [test_similarity] for [my_similarity]", ex.getMessage());
|
||||||
}
|
}
|
||||||
|
@ -346,7 +346,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||||
.build();
|
.build();
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
|
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
|
||||||
assertEquals("Similarity [my_similarity] must have an associated type", ex.getMessage());
|
assertEquals("Similarity [my_similarity] must have an associated type", ex.getMessage());
|
||||||
}
|
}
|
||||||
|
@ -356,7 +356,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
|
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
|
||||||
expectThrows(AlreadySetException.class, () -> module.forceQueryCacheProvider((a, b) -> new CustomQueryCache()));
|
expectThrows(AlreadySetException.class, () -> module.forceQueryCacheProvider((a, b) -> new CustomQueryCache()));
|
||||||
IndexService indexService = newIndexService(module);
|
IndexService indexService = newIndexService(module);
|
||||||
|
@ -369,7 +369,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
IndexService indexService = newIndexService(module);
|
IndexService indexService = newIndexService(module);
|
||||||
assertTrue(indexService.cache().query() instanceof IndexQueryCache);
|
assertTrue(indexService.cache().query() instanceof IndexQueryCache);
|
||||||
indexService.close("simon says", false);
|
indexService.close("simon says", false);
|
||||||
|
@ -381,7 +381,7 @@ public class IndexModuleTests extends ESTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
|
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
|
||||||
IndexService indexService = newIndexService(module);
|
IndexService indexService = newIndexService(module);
|
||||||
assertTrue(indexService.cache().query() instanceof DisabledQueryCache);
|
assertTrue(indexService.cache().query() instanceof DisabledQueryCache);
|
||||||
|
|
|
@ -65,7 +65,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
registry = new AnalysisRegistry(new Environment(settings),
|
registry = new AnalysisRegistry(new Environment(settings),
|
||||||
emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDefaultAnalyzers() throws IOException {
|
public void testDefaultAnalyzers() throws IOException {
|
||||||
|
@ -76,7 +76,8 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
|
||||||
|
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||||
.build(idxSettings);
|
.build(idxSettings);
|
||||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||||
|
@ -88,7 +89,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
||||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||||
singletonMap("default", analyzerProvider("default"))
|
singletonMap("default", analyzerProvider("default"))
|
||||||
, emptyMap(), emptyMap(), emptyMap());
|
, emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
|
@ -100,7 +101,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
AnalyzerProvider<?> defaultIndex = new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, new EnglishAnalyzer());
|
AnalyzerProvider<?> defaultIndex = new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, new EnglishAnalyzer());
|
||||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
() -> registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||||
singletonMap("default_index", defaultIndex), emptyMap(), emptyMap(), emptyMap()));
|
singletonMap("default_index", defaultIndex), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||||
assertTrue(e.getMessage().contains("[index.analysis.analyzer.default_index] is not supported"));
|
assertTrue(e.getMessage().contains("[index.analysis.analyzer.default_index] is not supported"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +110,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
VersionUtils.getPreviousVersion(Version.V_5_0_0_alpha1));
|
VersionUtils.getPreviousVersion(Version.V_5_0_0_alpha1));
|
||||||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
||||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||||
singletonMap("default_index", analyzerProvider("default_index")), emptyMap(), emptyMap(), emptyMap());
|
singletonMap("default_index", analyzerProvider("default_index")), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||||
|
@ -121,7 +122,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
Version version = VersionUtils.randomVersion(random());
|
Version version = VersionUtils.randomVersion(random());
|
||||||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
||||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||||
singletonMap("default_search", analyzerProvider("default_search")), emptyMap(), emptyMap(), emptyMap());
|
singletonMap("default_search", analyzerProvider("default_search")), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
|
@ -135,7 +136,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
analyzers.put("default_index", analyzerProvider("default_index"));
|
analyzers.put("default_index", analyzerProvider("default_index"));
|
||||||
analyzers.put("default_search", analyzerProvider("default_search"));
|
analyzers.put("default_search", analyzerProvider("default_search"));
|
||||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||||
analyzers, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
|
analyzers, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
|
||||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||||
|
@ -196,10 +197,11 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
Settings indexSettings = Settings.builder()
|
Settings indexSettings = Settings.builder()
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
||||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
|
||||||
|
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||||
.build(idxSettings);
|
.build(idxSettings);
|
||||||
IndexAnalyzers otherIndexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(),
|
IndexAnalyzers otherIndexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(),
|
||||||
emptyMap()).build(idxSettings);
|
emptyMap(), emptyMap()).build(idxSettings);
|
||||||
final int numIters = randomIntBetween(5, 20);
|
final int numIters = randomIntBetween(5, 20);
|
||||||
for (int i = 0; i < numIters; i++) {
|
for (int i = 0; i < numIters; i++) {
|
||||||
PreBuiltAnalyzers preBuiltAnalyzers = RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values());
|
PreBuiltAnalyzers preBuiltAnalyzers = RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values());
|
||||||
|
@ -219,7 +221,8 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||||
|
|
||||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap()).build(idxSettings));
|
() -> new AnalysisRegistry(new Environment(settings),
|
||||||
|
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()).build(idxSettings));
|
||||||
assertThat(e.getMessage(), equalTo("analyzer [test_analyzer] must specify either an analyzer type, or a tokenizer"));
|
assertThat(e.getMessage(), equalTo("analyzer [test_analyzer] must specify either an analyzer type, or a tokenizer"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,7 +231,8 @@ public class AnalysisRegistryTests extends ESTestCase {
|
||||||
Settings indexSettings = Settings.builder()
|
Settings indexSettings = Settings.builder()
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
||||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
|
||||||
|
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||||
.build(idxSettings);
|
.build(idxSettings);
|
||||||
indexAnalyzers.close();
|
indexAnalyzers.close();
|
||||||
indexAnalyzers.close();
|
indexAnalyzers.close();
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.env.Environment;
|
||||||
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
||||||
|
|
||||||
|
public void testBasics() throws IOException {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase", "asciifolding")
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||||
|
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
|
||||||
|
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
|
||||||
|
assertNotNull(normalizer);
|
||||||
|
assertEquals("my_normalizer", normalizer.name());
|
||||||
|
assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet ete-la"});
|
||||||
|
assertEquals(new BytesRef("cet ete-la"), normalizer.normalize("foo", "Cet été-là"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnknownType() {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.put("index.analysis.normalizer.my_normalizer.type", "foobar")
|
||||||
|
.putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase", "asciifolding")
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||||
|
assertEquals("Unknown normalizer type [foobar] for [my_normalizer]", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTokenizer() throws IOException {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.put("index.analysis.normalizer.my_normalizer.tokenizer", "keyword")
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||||
|
assertEquals("Custom normalizer [my_normalizer] cannot configure a tokenizer", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCharFilters() throws IOException {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.put("index.analysis.char_filter.my_mapping.type", "mapping")
|
||||||
|
.putArray("index.analysis.char_filter.my_mapping.mappings", "a => z")
|
||||||
|
.putArray("index.analysis.normalizer.my_normalizer.char_filter", "my_mapping")
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||||
|
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
|
||||||
|
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
|
||||||
|
assertNotNull(normalizer);
|
||||||
|
assertEquals("my_normalizer", normalizer.name());
|
||||||
|
assertTokenStreamContents(normalizer.tokenStream("foo", "abc"), new String[] {"zbc"});
|
||||||
|
assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIllegalFilters() throws IOException {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.putArray("index.analysis.normalizer.my_normalizer.filter", "porter_stem")
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||||
|
assertEquals("Custom normalizer [my_normalizer] may not use filter [porter_stem]", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIllegalCharFilters() throws IOException {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.putArray("index.analysis.normalizer.my_normalizer.char_filter", "html_strip")
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||||
|
assertEquals("Custom normalizer [my_normalizer] may not use char filter [html_strip]", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
|
@ -2313,7 +2313,7 @@ public class InternalEngineTests extends ESTestCase {
|
||||||
Index index = new Index(indexName, "_na_");
|
Index index = new Index(indexName, "_na_");
|
||||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
||||||
NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
||||||
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, Collections.emptyMap());
|
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, Collections.emptyMap(), Collections.emptyMap());
|
||||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||||
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
|
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
|
||||||
mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
|
mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
|
||||||
|
|
|
@ -25,8 +25,10 @@ import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.IndexableFieldType;
|
import org.apache.lucene.index.IndexableFieldType;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.common.compress.CompressedXContent;
|
import org.elasticsearch.common.compress.CompressedXContent;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
|
import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
import org.elasticsearch.test.InternalSettingsPlugin;
|
import org.elasticsearch.test.InternalSettingsPlugin;
|
||||||
|
@ -51,7 +53,11 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
indexService = createIndex("test");
|
indexService = createIndex("test", Settings.builder()
|
||||||
|
.put("index.analysis.normalizer.my_lowercase.type", "custom")
|
||||||
|
.putArray("index.analysis.normalizer.my_lowercase.filter", "lowercase")
|
||||||
|
.put("index.analysis.normalizer.my_asciifolding.type", "custom")
|
||||||
|
.putArray("index.analysis.normalizer.my_asciifolding.filter", "asciifolding").build());
|
||||||
parser = indexService.mapperService().documentMapperParser();
|
parser = indexService.mapperService().documentMapperParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -283,6 +289,62 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
assertFalse(fields[0].fieldType().omitNorms());
|
assertFalse(fields[0].fieldType().omitNorms());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNormalizer() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field")
|
||||||
|
.field("type", "keyword").field("normalizer", "my_lowercase").endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "AbC")
|
||||||
|
.endObject()
|
||||||
|
.bytes());
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(2, fields.length);
|
||||||
|
|
||||||
|
assertEquals(new BytesRef("abc"), fields[0].binaryValue());
|
||||||
|
IndexableFieldType fieldType = fields[0].fieldType();
|
||||||
|
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||||
|
assertFalse(fieldType.tokenized());
|
||||||
|
assertFalse(fieldType.stored());
|
||||||
|
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||||
|
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||||
|
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||||
|
|
||||||
|
assertEquals(new BytesRef("abc"), fields[1].binaryValue());
|
||||||
|
fieldType = fields[1].fieldType();
|
||||||
|
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||||
|
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUpdateNormalizer() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field")
|
||||||
|
.field("type", "keyword").field("normalizer", "my_lowercase").endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, randomBoolean());
|
||||||
|
|
||||||
|
String mapping2 = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field")
|
||||||
|
.field("type", "keyword").field("normalizer", "my_asciifolding").endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> indexService.mapperService().merge("type",
|
||||||
|
new CompressedXContent(mapping2), MergeReason.MAPPING_UPDATE, randomBoolean()));
|
||||||
|
assertEquals(
|
||||||
|
"Mapper for [field] conflicts with existing mapping in other types:\n[mapper [field] has different [normalizer]]",
|
||||||
|
e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
public void testEmptyName() throws IOException {
|
public void testEmptyName() throws IOException {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject()
|
String mapping = XContentFactory.jsonBuilder().startObject()
|
||||||
.startObject("type")
|
.startObject("type")
|
||||||
|
|
|
@ -20,22 +20,41 @@ package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.TermsQuery;
|
import org.apache.lucene.queries.TermsQuery;
|
||||||
import org.apache.lucene.search.FuzzyQuery;
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.search.RegexpQuery;
|
import org.apache.lucene.search.RegexpQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.unit.Fuzziness;
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
import org.elasticsearch.index.analysis.AnalyzerScope;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
|
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
||||||
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class KeywordFieldTypeTests extends FieldTypeTestCase {
|
public class KeywordFieldTypeTests extends FieldTypeTestCase {
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setupProperties() {
|
||||||
|
addModifier(new Modifier("normalizer", false) {
|
||||||
|
@Override
|
||||||
|
public void modify(MappedFieldType ft) {
|
||||||
|
((KeywordFieldType) ft).setNormalizer(Lucene.KEYWORD_ANALYZER);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected MappedFieldType createDefaultFieldType() {
|
protected MappedFieldType createDefaultFieldType() {
|
||||||
return new KeywordFieldMapper.KeywordFieldType();
|
return new KeywordFieldMapper.KeywordFieldType();
|
||||||
|
@ -62,6 +81,31 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase {
|
||||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTermQueryWithNormalizer() {
|
||||||
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
|
ft.setName("field");
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
Analyzer normalizer = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
Tokenizer in = new WhitespaceTokenizer();
|
||||||
|
TokenFilter out = new LowerCaseFilter(in);
|
||||||
|
return new TokenStreamComponents(in, out);
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||||
|
return new LowerCaseFilter(in);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ft.setSearchAnalyzer(new NamedAnalyzer("my_normalizer", AnalyzerScope.INDEX, normalizer));
|
||||||
|
assertEquals(new TermQuery(new Term("field", "foo bar")), ft.termQuery("fOo BaR", null));
|
||||||
|
|
||||||
|
ft.setIndexOptions(IndexOptions.NONE);
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> ft.termQuery("bar", null));
|
||||||
|
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
public void testTermsQuery() {
|
public void testTermsQuery() {
|
||||||
MappedFieldType ft = createDefaultFieldType();
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
ft.setName("field");
|
ft.setName("field");
|
||||||
|
|
|
@ -101,7 +101,7 @@ public class ParentFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, Settings.EMPTY);
|
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, Settings.EMPTY);
|
||||||
NamedAnalyzer namedAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
NamedAnalyzer namedAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
||||||
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, namedAnalyzer, namedAnalyzer, namedAnalyzer,
|
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, namedAnalyzer, namedAnalyzer, namedAnalyzer,
|
||||||
Collections.emptyMap());
|
Collections.emptyMap(), Collections.emptyMap());
|
||||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||||
MapperService mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry(), similarityService,
|
MapperService mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry(), similarityService,
|
||||||
new IndicesModule(emptyList()).getMapperRegistry(), () -> null);
|
new IndicesModule(emptyList()).getMapperRegistry(), () -> null);
|
||||||
|
|
|
@ -112,6 +112,8 @@ include::analysis/testing.asciidoc[]
|
||||||
|
|
||||||
include::analysis/analyzers.asciidoc[]
|
include::analysis/analyzers.asciidoc[]
|
||||||
|
|
||||||
|
include::analysis/normalizers.asciidoc[]
|
||||||
|
|
||||||
include::analysis/tokenizers.asciidoc[]
|
include::analysis/tokenizers.asciidoc[]
|
||||||
|
|
||||||
include::analysis/tokenfilters.asciidoc[]
|
include::analysis/tokenfilters.asciidoc[]
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
[[analysis-normalizers]]
|
||||||
|
== Normalizers
|
||||||
|
|
||||||
|
experimental[]
|
||||||
|
|
||||||
|
Normalizers are similar to analyzers except that they may only emit a single
|
||||||
|
token. As a consequence, they do not have a tokenizer and only accept a subset
|
||||||
|
of the available char filters and token filters. Only the filters that work on
|
||||||
|
a per-character basis are allowed. For instance a lowercasing filter would be
|
||||||
|
allowed, but not a stemming filter, which needs to look at the keyword as a
|
||||||
|
whole.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
=== Custom analyzers
|
||||||
|
|
||||||
|
Elasticsearch does not ship with built-in normalizers so far, so the only way
|
||||||
|
to get one is by building a custom one. Custom normalizers take a list of char
|
||||||
|
<<analysis-charfilters, character filters>> and a list of
|
||||||
|
<<analysis-tokenfilters,token filters>>.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------
|
||||||
|
PUT index
|
||||||
|
{
|
||||||
|
"settings": {
|
||||||
|
"analysis": {
|
||||||
|
"char_filter": {
|
||||||
|
"quote": {
|
||||||
|
"type": "mapping",
|
||||||
|
"mappings": [
|
||||||
|
"« => \"",
|
||||||
|
"» => \""
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"normalizer": {
|
||||||
|
"my_normalizer": {
|
||||||
|
"type": "custom",
|
||||||
|
"char_filter": ["quote"],
|
||||||
|
"filter": ["lowercase", "asciifolding"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": {
|
||||||
|
"type": {
|
||||||
|
"properties": {
|
||||||
|
"foo": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "my_normalizer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------
|
||||||
|
// CONSOLE
|
|
@ -8,6 +8,7 @@ parameters that are used by <<mapping-types,field mappings>>:
|
||||||
The following mapping parameters are common to some or all field datatypes:
|
The following mapping parameters are common to some or all field datatypes:
|
||||||
|
|
||||||
* <<analyzer,`analyzer`>>
|
* <<analyzer,`analyzer`>>
|
||||||
|
* <<normalizer, `normalizer`>>
|
||||||
* <<mapping-boost,`boost`>>
|
* <<mapping-boost,`boost`>>
|
||||||
* <<coerce,`coerce`>>
|
* <<coerce,`coerce`>>
|
||||||
* <<copy-to,`copy_to`>>
|
* <<copy-to,`copy_to`>>
|
||||||
|
@ -34,6 +35,8 @@ The following mapping parameters are common to some or all field datatypes:
|
||||||
|
|
||||||
include::params/analyzer.asciidoc[]
|
include::params/analyzer.asciidoc[]
|
||||||
|
|
||||||
|
include::params/normalizer.asciidoc[]
|
||||||
|
|
||||||
include::params/boost.asciidoc[]
|
include::params/boost.asciidoc[]
|
||||||
|
|
||||||
include::params/coerce.asciidoc[]
|
include::params/coerce.asciidoc[]
|
||||||
|
|
|
@ -0,0 +1,163 @@
|
||||||
|
[[normalizer]]
|
||||||
|
=== `normalizer`
|
||||||
|
|
||||||
|
The `normalizer` property of <<keyword,`keyword`>> fields is similar to
|
||||||
|
<<analyzer,`analyzer`>> except that it guarantees that the analysis chain
|
||||||
|
produces a single token.
|
||||||
|
|
||||||
|
The `normalizer` is applied prior to indexing the keyword, as well as at
|
||||||
|
search-time when the `keyword` field is searched via a query parser such as
|
||||||
|
the <<query-dsl-match-query,`match`>> query.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------
|
||||||
|
PUT index
|
||||||
|
{
|
||||||
|
"settings": {
|
||||||
|
"analysis": {
|
||||||
|
"normalizer": {
|
||||||
|
"my_normalizer": {
|
||||||
|
"type": "custom",
|
||||||
|
"char_filter": [],
|
||||||
|
"filter": ["lowercase", "asciifolding"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": {
|
||||||
|
"type": {
|
||||||
|
"properties": {
|
||||||
|
"foo": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "my_normalizer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT index/type/1
|
||||||
|
{
|
||||||
|
"foo": "BÀR"
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT index/type/2
|
||||||
|
{
|
||||||
|
"foo": "bar"
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT index/type/3
|
||||||
|
{
|
||||||
|
"foo": "baz"
|
||||||
|
}
|
||||||
|
|
||||||
|
POST index/_refresh
|
||||||
|
|
||||||
|
GET index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match": {
|
||||||
|
"foo": "BAR"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
|
||||||
|
The above query matches documents 1 and 2 since `BÀR` is converted to `bar` at
|
||||||
|
both index and query time.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
----------------------------
|
||||||
|
{
|
||||||
|
"took": $body.took,
|
||||||
|
"timed_out": false,
|
||||||
|
"_shards": {
|
||||||
|
"total": 5,
|
||||||
|
"successful": 5,
|
||||||
|
"failed": 0
|
||||||
|
},
|
||||||
|
"hits": {
|
||||||
|
"total": 2,
|
||||||
|
"max_score": 0.2876821,
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"_index": "index",
|
||||||
|
"_type": "type",
|
||||||
|
"_id": "2",
|
||||||
|
"_score": 0.2876821,
|
||||||
|
"_source": {
|
||||||
|
"foo": "bar"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_index": "index",
|
||||||
|
"_type": "type",
|
||||||
|
"_id": "1",
|
||||||
|
"_score": 0.2876821,
|
||||||
|
"_source": {
|
||||||
|
"foo": "BÀR"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
----------------------------
|
||||||
|
// TESTRESPONSE[s/"took".*/"took": "$body.took",/]
|
||||||
|
|
||||||
|
Also, the fact that keywords are converted prior to indexing also means that
|
||||||
|
aggregations return normalized values:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
----------------------------
|
||||||
|
GET index/_search
|
||||||
|
{
|
||||||
|
"size": 0,
|
||||||
|
"aggs": {
|
||||||
|
"foo_terms": {
|
||||||
|
"terms": {
|
||||||
|
"field": "foo"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
// TEST[continued]
|
||||||
|
|
||||||
|
returns
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
----------------------------
|
||||||
|
{
|
||||||
|
"took": 43,
|
||||||
|
"timed_out": false,
|
||||||
|
"_shards": {
|
||||||
|
"total": 5,
|
||||||
|
"successful": 5,
|
||||||
|
"failed": 0
|
||||||
|
},
|
||||||
|
"hits": {
|
||||||
|
"total": 3,
|
||||||
|
"max_score": 0.0,
|
||||||
|
"hits": []
|
||||||
|
},
|
||||||
|
"aggregations": {
|
||||||
|
"foo_terms": {
|
||||||
|
"doc_count_error_upper_bound": 0,
|
||||||
|
"sum_other_doc_count": 0,
|
||||||
|
"buckets": [
|
||||||
|
{
|
||||||
|
"key": "bar",
|
||||||
|
"doc_count": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "baz",
|
||||||
|
"doc_count": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
----------------------------
|
||||||
|
// TESTRESPONSE[s/"took".*/"took": "$body.took",/]
|
|
@ -109,6 +109,12 @@ The following parameters are accepted by `keyword` fields:
|
||||||
Which scoring algorithm or _similarity_ should be used. Defaults
|
Which scoring algorithm or _similarity_ should be used. Defaults
|
||||||
to `classic`, which uses TF/IDF.
|
to `classic`, which uses TF/IDF.
|
||||||
|
|
||||||
|
<<normalizer,`normalizer`>>::
|
||||||
|
|
||||||
|
experimental[]
|
||||||
|
How to pre-process the keyword prior to indexing. Defaults to `null`,
|
||||||
|
meaning the keyword is kept as-is.
|
||||||
|
|
||||||
NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will
|
NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will
|
||||||
attempt to downgrade `keyword` into `string`. This allows you to merge modern
|
attempt to downgrade `keyword` into `string`. This allows you to merge modern
|
||||||
mappings with legacy mappings. Long lived indexes will have to be recreated
|
mappings with legacy mappings. Long lived indexes will have to be recreated
|
||||||
|
|
Loading…
Reference in New Issue