Add the ability to set an analyzer on keyword fields. (#21919)
This adds a new `normalizer` property to `keyword` fields that pre-processes the field value prior to indexing, but without altering the `_source`. Note that only the normalization components that work on a per-character basis are applied, so for instance stemming filters will be ignored while lowercasing or ascii folding will be applied. Closes #18064
This commit is contained in:
parent
117b63ed41
commit
3f805d68cb
|
@ -145,11 +145,10 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public Set<Entry<String, NamedAnalyzer>> entrySet() {
|
||||
// just to ensure we can iterate over this single analzyer
|
||||
return Collections.singletonMap(fakeDefault.name(), fakeDefault).entrySet();
|
||||
return Collections.emptySet();
|
||||
}
|
||||
};
|
||||
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap)) {
|
||||
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap)) {
|
||||
MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService,
|
||||
mapperRegistry, () -> null);
|
||||
mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY, false);
|
||||
|
|
|
@ -67,17 +67,20 @@ public final class AnalysisRegistry implements Closeable {
|
|||
private final Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters;
|
||||
private final Map<String, AnalysisProvider<TokenizerFactory>> tokenizers;
|
||||
private final Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers;
|
||||
private final Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers;
|
||||
|
||||
public AnalysisRegistry(Environment environment,
|
||||
Map<String, AnalysisProvider<CharFilterFactory>> charFilters,
|
||||
Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters,
|
||||
Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
|
||||
Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers) {
|
||||
Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers,
|
||||
Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers) {
|
||||
this.environment = environment;
|
||||
this.charFilters = unmodifiableMap(charFilters);
|
||||
this.tokenFilters = unmodifiableMap(tokenFilters);
|
||||
this.tokenizers = unmodifiableMap(tokenizers);
|
||||
this.analyzers = unmodifiableMap(analyzers);
|
||||
this.normalizers = unmodifiableMap(normalizers);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -151,7 +154,8 @@ public final class AnalysisRegistry implements Closeable {
|
|||
final Map<String, TokenizerFactory> tokenizerFactories = buildTokenizerFactories(indexSettings);
|
||||
final Map<String, TokenFilterFactory> tokenFilterFactories = buildTokenFilterFactories(indexSettings);
|
||||
final Map<String, AnalyzerProvider<?>> analyzierFactories = buildAnalyzerFactories(indexSettings);
|
||||
return build(indexSettings, analyzierFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
|
||||
final Map<String, AnalyzerProvider<?>> normalizerFactories = buildNormalizerFactories(indexSettings);
|
||||
return build(indexSettings, analyzierFactories, normalizerFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
|
||||
}
|
||||
|
||||
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
|
||||
|
@ -164,22 +168,28 @@ public final class AnalysisRegistry implements Closeable {
|
|||
*/
|
||||
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
|
||||
tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings)));
|
||||
return buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
|
||||
return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
|
||||
}
|
||||
|
||||
public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
|
||||
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
|
||||
return buildMapping(false, "tokenizer", indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.tokenizerFactories);
|
||||
return buildMapping(Component.TOKENIZER, indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.tokenizerFactories);
|
||||
}
|
||||
|
||||
public Map<String, CharFilterFactory> buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
|
||||
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
|
||||
return buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
|
||||
return buildMapping(Component.CHAR_FILTER, indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
|
||||
}
|
||||
|
||||
public Map<String, AnalyzerProvider<?>> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
|
||||
final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
|
||||
return buildMapping(true, "analyzer", indexSettings, analyzersSettings, analyzers, prebuiltAnalysis.analyzerProviderFactories);
|
||||
return buildMapping(Component.ANALYZER, indexSettings, analyzersSettings, analyzers, prebuiltAnalysis.analyzerProviderFactories);
|
||||
}
|
||||
|
||||
public Map<String, AnalyzerProvider<?>> buildNormalizerFactories(IndexSettings indexSettings) throws IOException {
|
||||
final Map<String, Settings> noralizersSettings = indexSettings.getSettings().getGroups("index.analysis.normalizer");
|
||||
// TODO: Have pre-built normalizers
|
||||
return buildMapping(Component.NORMALIZER, indexSettings, noralizersSettings, normalizers, Collections.emptyMap());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -194,7 +204,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
|
||||
if (tokenizerSettings.containsKey(tokenizer)) {
|
||||
Settings currentSettings = tokenizerSettings.get(tokenizer);
|
||||
return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type"));
|
||||
return getAnalysisProvider(Component.TOKENIZER, tokenizers, tokenizer, currentSettings.get("type"));
|
||||
} else {
|
||||
return getTokenizerProvider(tokenizer);
|
||||
}
|
||||
|
@ -223,7 +233,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
} else if ("synonym_graph".equals(typeName)) {
|
||||
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings));
|
||||
} else {
|
||||
return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName);
|
||||
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
|
||||
}
|
||||
} else {
|
||||
return getTokenFilterProvider(tokenFilter);
|
||||
|
@ -242,7 +252,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
|
||||
if (tokenFilterSettings.containsKey(charFilter)) {
|
||||
Settings currentSettings = tokenFilterSettings.get(charFilter);
|
||||
return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type"));
|
||||
return getAnalysisProvider(Component.CHAR_FILTER, charFilters, charFilter, currentSettings.get("type"));
|
||||
} else {
|
||||
return getCharFilterProvider(charFilter);
|
||||
}
|
||||
|
@ -261,7 +271,40 @@ public final class AnalysisRegistry implements Closeable {
|
|||
};
|
||||
}
|
||||
|
||||
private <T> Map<String, T> buildMapping(boolean analyzer, String toBuild, IndexSettings settings, Map<String, Settings> settingsMap,
|
||||
enum Component {
|
||||
ANALYZER {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "analyzer";
|
||||
}
|
||||
},
|
||||
NORMALIZER {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "normalizer";
|
||||
}
|
||||
},
|
||||
CHAR_FILTER {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "char_filter";
|
||||
}
|
||||
},
|
||||
TOKENIZER {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "tokenizer";
|
||||
}
|
||||
},
|
||||
FILTER {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "filter";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private <T> Map<String, T> buildMapping(Component component, IndexSettings settings, Map<String, Settings> settingsMap,
|
||||
Map<String, AnalysisModule.AnalysisProvider<T>> providerMap, Map<String, AnalysisModule.AnalysisProvider<T>> defaultInstance)
|
||||
throws IOException {
|
||||
Settings defaultSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, settings.getIndexVersionCreated()).build();
|
||||
|
@ -270,29 +313,34 @@ public final class AnalysisRegistry implements Closeable {
|
|||
String name = entry.getKey();
|
||||
Settings currentSettings = entry.getValue();
|
||||
String typeName = currentSettings.get("type");
|
||||
if (analyzer) {
|
||||
T factory;
|
||||
if (component == Component.ANALYZER) {
|
||||
T factory = null;
|
||||
if (typeName == null) {
|
||||
if (currentSettings.get("tokenizer") != null) {
|
||||
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
|
||||
} else {
|
||||
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||
throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||
}
|
||||
} else if (typeName.equals("custom")) {
|
||||
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
|
||||
} else {
|
||||
AnalysisModule.AnalysisProvider<T> type = providerMap.get(typeName);
|
||||
if (type == null) {
|
||||
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
|
||||
}
|
||||
factory = type.get(settings, environment, name, currentSettings);
|
||||
}
|
||||
factories.put(name, factory);
|
||||
} else {
|
||||
AnalysisProvider<T> type = getAnalysisProvider(toBuild, providerMap, name, typeName);
|
||||
final T factory = type.get(settings, environment, name, currentSettings);
|
||||
factories.put(name, factory);
|
||||
if (factory != null) {
|
||||
factories.put(name, factory);
|
||||
continue;
|
||||
}
|
||||
} else if (component == Component.NORMALIZER) {
|
||||
if (typeName == null || typeName.equals("custom")) {
|
||||
T factory = (T) new CustomNormalizerProvider(settings, name, currentSettings);
|
||||
factories.put(name, factory);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
AnalysisProvider<T> type = getAnalysisProvider(component, providerMap, name, typeName);
|
||||
if (type == null) {
|
||||
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
|
||||
}
|
||||
final T factory = type.get(settings, environment, name, currentSettings);
|
||||
factories.put(name, factory);
|
||||
|
||||
}
|
||||
// go over the char filters in the bindings and register the ones that are not configured
|
||||
|
@ -330,13 +378,13 @@ public final class AnalysisRegistry implements Closeable {
|
|||
return factories;
|
||||
}
|
||||
|
||||
private <T> AnalysisProvider<T> getAnalysisProvider(String toBuild, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
|
||||
private <T> AnalysisProvider<T> getAnalysisProvider(Component component, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
|
||||
if (typeName == null) {
|
||||
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||
throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||
}
|
||||
AnalysisProvider<T> type = providerMap.get(typeName);
|
||||
if (type == null) {
|
||||
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
|
||||
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
@ -426,6 +474,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
|
||||
public IndexAnalyzers build(IndexSettings indexSettings,
|
||||
Map<String, AnalyzerProvider<?>> analyzerProviders,
|
||||
Map<String, AnalyzerProvider<?>> normalizerProviders,
|
||||
Map<String, TokenizerFactory> tokenizerFactoryFactories,
|
||||
Map<String, CharFilterFactory> charFilterFactoryFactories,
|
||||
Map<String, TokenFilterFactory> tokenFilterFactoryFactories) {
|
||||
|
@ -436,10 +485,15 @@ public final class AnalysisRegistry implements Closeable {
|
|||
DeprecationLogger deprecationLogger = new DeprecationLogger(logger);
|
||||
Map<String, NamedAnalyzer> analyzerAliases = new HashMap<>();
|
||||
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
|
||||
Map<String, NamedAnalyzer> normalizers = new HashMap<>();
|
||||
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
|
||||
processAnalyzerFactory(deprecationLogger, indexSettings, entry.getKey(), entry.getValue(), analyzerAliases, analyzers,
|
||||
tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories);
|
||||
}
|
||||
for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) {
|
||||
processNormalizerFactory(deprecationLogger, indexSettings, entry.getKey(), entry.getValue(), normalizers,
|
||||
tokenFilterFactoryFactories, charFilterFactoryFactories);
|
||||
}
|
||||
for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
if (analyzers.containsKey(key) &&
|
||||
|
@ -485,7 +539,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
}
|
||||
}
|
||||
return new IndexAnalyzers(indexSettings, defaultIndexAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer,
|
||||
unmodifiableMap(analyzers));
|
||||
unmodifiableMap(analyzers), unmodifiableMap(normalizers));
|
||||
}
|
||||
|
||||
private void processAnalyzerFactory(DeprecationLogger deprecationLogger,
|
||||
|
@ -551,4 +605,25 @@ public final class AnalysisRegistry implements Closeable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void processNormalizerFactory(DeprecationLogger deprecationLogger,
|
||||
IndexSettings indexSettings,
|
||||
String name,
|
||||
AnalyzerProvider<?> normalizerFactory,
|
||||
Map<String, NamedAnalyzer> normalizers,
|
||||
Map<String, TokenFilterFactory> tokenFilters,
|
||||
Map<String, CharFilterFactory> charFilters) {
|
||||
if (normalizerFactory instanceof CustomNormalizerProvider) {
|
||||
((CustomNormalizerProvider) normalizerFactory).build(charFilters, tokenFilters);
|
||||
}
|
||||
Analyzer normalizerF = normalizerFactory.get();
|
||||
if (normalizerF == null) {
|
||||
throw new IllegalArgumentException("normalizer [" + normalizerFactory.name() + "] created null normalizer");
|
||||
}
|
||||
NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF);
|
||||
if (normalizers.containsKey(name)) {
|
||||
throw new IllegalStateException("already registered analyzer with name: " + name);
|
||||
}
|
||||
normalizers.put(name, normalizer);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,4 +94,27 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
}
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
|
||||
for (CharFilterFactory charFilter : charFilters) {
|
||||
if (charFilter instanceof MultiTermAwareComponent) {
|
||||
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
|
||||
reader = charFilter.create(reader);
|
||||
}
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = in;
|
||||
for (TokenFilterFactory filter : tokenFilters) {
|
||||
if (filter instanceof MultiTermAwareComponent) {
|
||||
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
|
||||
result = filter.create(result);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A custom normalizer that is built out of a char and token filters. On the
|
||||
* contrary to analyzers, it does not support tokenizers and only supports a
|
||||
* subset of char and token filters.
|
||||
*/
|
||||
public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvider<CustomAnalyzer> {
|
||||
|
||||
private final Settings analyzerSettings;
|
||||
|
||||
private CustomAnalyzer customAnalyzer;
|
||||
|
||||
public CustomNormalizerProvider(IndexSettings indexSettings,
|
||||
String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
this.analyzerSettings = settings;
|
||||
}
|
||||
|
||||
public void build(final Map<String, CharFilterFactory> charFilters, final Map<String, TokenFilterFactory> tokenFilters) {
|
||||
String tokenizerName = analyzerSettings.get("tokenizer");
|
||||
if (tokenizerName != null) {
|
||||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer");
|
||||
}
|
||||
|
||||
List<CharFilterFactory> charFiltersList = new ArrayList<>();
|
||||
String[] charFilterNames = analyzerSettings.getAsArray("char_filter");
|
||||
for (String charFilterName : charFilterNames) {
|
||||
CharFilterFactory charFilter = charFilters.get(charFilterName);
|
||||
if (charFilter == null) {
|
||||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] failed to find char_filter under name ["
|
||||
+ charFilterName + "]");
|
||||
}
|
||||
if (charFilter instanceof MultiTermAwareComponent == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use char filter ["
|
||||
+ charFilterName + "]");
|
||||
}
|
||||
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
|
||||
charFiltersList.add(charFilter);
|
||||
}
|
||||
|
||||
List<TokenFilterFactory> tokenFilterList = new ArrayList<>();
|
||||
String[] tokenFilterNames = analyzerSettings.getAsArray("filter");
|
||||
for (String tokenFilterName : tokenFilterNames) {
|
||||
TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
|
||||
if (tokenFilter == null) {
|
||||
throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find filter under name ["
|
||||
+ tokenFilterName + "]");
|
||||
}
|
||||
if (tokenFilter instanceof MultiTermAwareComponent == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use filter [" + tokenFilterName + "]");
|
||||
}
|
||||
tokenFilter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilter).getMultiTermComponent();
|
||||
tokenFilterList.add(tokenFilter);
|
||||
}
|
||||
|
||||
this.customAnalyzer = new CustomAnalyzer(
|
||||
PreBuiltTokenizers.KEYWORD.getTokenizerFactory(indexSettings.getIndexVersionCreated()),
|
||||
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
|
||||
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CustomAnalyzer get() {
|
||||
return this.customAnalyzer;
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@ import org.elasticsearch.index.IndexSettings;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* IndexAnalyzers contains a name to analyzer mapping for a specific index.
|
||||
|
@ -38,15 +39,18 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
|||
private final NamedAnalyzer defaultSearchAnalyzer;
|
||||
private final NamedAnalyzer defaultSearchQuoteAnalyzer;
|
||||
private final Map<String, NamedAnalyzer> analyzers;
|
||||
private final Map<String, NamedAnalyzer> normalizers;
|
||||
private final IndexSettings indexSettings;
|
||||
|
||||
public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer,
|
||||
NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers) {
|
||||
NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers,
|
||||
Map<String, NamedAnalyzer> normalizers) {
|
||||
super(indexSettings);
|
||||
this.defaultIndexAnalyzer = defaultIndexAnalyzer;
|
||||
this.defaultSearchAnalyzer = defaultSearchAnalyzer;
|
||||
this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer;
|
||||
this.analyzers = analyzers;
|
||||
this.normalizers = normalizers;
|
||||
this.indexSettings = indexSettings;
|
||||
}
|
||||
|
||||
|
@ -57,6 +61,12 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
|||
return analyzers.get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a normalizer mapped to the given name or <code>null</code> if not present
|
||||
*/
|
||||
public NamedAnalyzer getNormalizer(String name) {
|
||||
return normalizers.get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default index analyzer for this index
|
||||
|
@ -81,7 +91,7 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(() -> analyzers.values().stream()
|
||||
IOUtils.close(() -> Stream.concat(analyzers.values().stream(), normalizers.values().stream())
|
||||
.filter(a -> a.scope() == AnalyzerScope.INDEX)
|
||||
.iterator());
|
||||
}
|
||||
|
|
|
@ -19,16 +19,20 @@
|
|||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
||||
|
||||
|
@ -36,6 +40,7 @@ import java.io.IOException;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.index.mapper.TypeParsers.parseField;
|
||||
|
||||
|
@ -70,6 +75,11 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
builder = this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public KeywordFieldType fieldType() {
|
||||
return (KeywordFieldType) super.fieldType();
|
||||
}
|
||||
|
||||
public Builder ignoreAbove(int ignoreAbove) {
|
||||
if (ignoreAbove < 0) {
|
||||
throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove);
|
||||
|
@ -92,6 +102,12 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
return builder;
|
||||
}
|
||||
|
||||
public Builder normalizer(NamedAnalyzer normalizer) {
|
||||
fieldType().setNormalizer(normalizer);
|
||||
fieldType().setSearchAnalyzer(normalizer);
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public KeywordFieldMapper build(BuilderContext context) {
|
||||
setupFieldType(context);
|
||||
|
@ -103,7 +119,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
|
||||
public static class TypeParser implements Mapper.TypeParser {
|
||||
@Override
|
||||
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||
public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||
KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder(name);
|
||||
parseField(builder, name, node, parserContext);
|
||||
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
|
||||
|
@ -125,6 +141,15 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
} else if (propName.equals("eager_global_ordinals")) {
|
||||
builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode));
|
||||
iterator.remove();
|
||||
} else if (propName.equals("normalizer")) {
|
||||
if (propNode != null) {
|
||||
NamedAnalyzer normalizer = parserContext.getIndexAnalyzers().getNormalizer(propNode.toString());
|
||||
if (normalizer == null) {
|
||||
throw new MapperParsingException("normalizer [" + propNode.toString() + "] not found for field [" + name + "]");
|
||||
}
|
||||
builder.normalizer(normalizer);
|
||||
}
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
return builder;
|
||||
|
@ -133,21 +158,58 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
|
||||
public static final class KeywordFieldType extends StringFieldType {
|
||||
|
||||
public KeywordFieldType() {}
|
||||
private NamedAnalyzer normalizer = null;
|
||||
|
||||
public KeywordFieldType() {
|
||||
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||
}
|
||||
|
||||
protected KeywordFieldType(KeywordFieldType ref) {
|
||||
super(ref);
|
||||
this.normalizer = ref.normalizer;
|
||||
}
|
||||
|
||||
public KeywordFieldType clone() {
|
||||
return new KeywordFieldType(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (super.equals(o) == false) {
|
||||
return false;
|
||||
}
|
||||
return Objects.equals(normalizer, ((KeywordFieldType) o).normalizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkCompatibility(MappedFieldType otherFT, List<String> conflicts, boolean strict) {
|
||||
super.checkCompatibility(otherFT, conflicts, strict);
|
||||
KeywordFieldType other = (KeywordFieldType) otherFT;
|
||||
if (Objects.equals(normalizer, other.normalizer) == false) {
|
||||
conflicts.add("mapper [" + name() + "] has different [normalizer]");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * super.hashCode() + Objects.hashCode(normalizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String typeName() {
|
||||
return CONTENT_TYPE;
|
||||
}
|
||||
|
||||
public NamedAnalyzer normalizer() {
|
||||
return normalizer;
|
||||
}
|
||||
|
||||
public void setNormalizer(NamedAnalyzer normalizer) {
|
||||
checkIfFrozen();
|
||||
this.normalizer = normalizer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query nullValueQuery() {
|
||||
if (nullValue() == null) {
|
||||
|
@ -171,13 +233,25 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
BytesRef binaryValue = (BytesRef) value;
|
||||
return binaryValue.utf8ToString();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BytesRef indexedValueForSearch(Object value) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
if (value instanceof BytesRef) {
|
||||
value = ((BytesRef) value).utf8ToString();
|
||||
}
|
||||
return searchAnalyzer().normalize(name(), value.toString());
|
||||
}
|
||||
}
|
||||
|
||||
private Boolean includeInAll;
|
||||
private int ignoreAbove;
|
||||
|
||||
protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||
int ignoreAbove, Boolean includeInAll, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
||||
int ignoreAbove, Boolean includeInAll,
|
||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
|
||||
this.ignoreAbove = ignoreAbove;
|
||||
|
@ -196,6 +270,11 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
return (KeywordFieldMapper) super.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public KeywordFieldType fieldType() {
|
||||
return (KeywordFieldType) super.fieldType();
|
||||
}
|
||||
|
||||
// pkg-private for testing
|
||||
Boolean includeInAll() {
|
||||
return includeInAll;
|
||||
|
@ -203,7 +282,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
|
||||
@Override
|
||||
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
|
||||
final String value;
|
||||
String value;
|
||||
if (context.externalValueSet()) {
|
||||
value = context.externalValue().toString();
|
||||
} else {
|
||||
|
@ -219,6 +298,27 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
return;
|
||||
}
|
||||
|
||||
final NamedAnalyzer normalizer = fieldType().normalizer();
|
||||
if (normalizer != null) {
|
||||
try (final TokenStream ts = normalizer.tokenStream(name(), value)) {
|
||||
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
if (ts.incrementToken() == false) {
|
||||
throw new IllegalStateException("The normalization token stream is "
|
||||
+ "expected to produce exactly 1 token, but got 0 for analyzer "
|
||||
+ normalizer + " and input \"" + value + "\"");
|
||||
}
|
||||
final String newValue = termAtt.toString();
|
||||
if (ts.incrementToken()) {
|
||||
throw new IllegalStateException("The normalization token stream is "
|
||||
+ "expected to produce exactly 1 token, but got 2+ for analyzer "
|
||||
+ normalizer + " and input \"" + value + "\"");
|
||||
}
|
||||
ts.end();
|
||||
value = newValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (context.includeInAll(includeInAll, this)) {
|
||||
context.allEntries().addText(fieldType().name(), value, fieldType().boost());
|
||||
}
|
||||
|
@ -263,5 +363,11 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) {
|
||||
builder.field("ignore_above", ignoreAbove);
|
||||
}
|
||||
|
||||
if (fieldType().normalizer() != null) {
|
||||
builder.field("normalizer", fieldType().normalizer().name());
|
||||
} else if (includeDefaults) {
|
||||
builder.nullField("normalizer");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -170,8 +170,9 @@ public final class AnalysisModule {
|
|||
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
|
||||
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
|
||||
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
|
||||
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers(plugins);
|
||||
analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers
|
||||
.getRegistry(), analyzers.getRegistry());
|
||||
.getRegistry(), analyzers.getRegistry(), normalizers.getRegistry());
|
||||
}
|
||||
|
||||
HunspellService getHunspellService() {
|
||||
|
@ -334,6 +335,13 @@ public final class AnalysisModule {
|
|||
return analyzers;
|
||||
}
|
||||
|
||||
private NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> setupNormalizers(List<AnalysisPlugin> plugins) {
|
||||
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = new NamedRegistry<>("normalizer");
|
||||
// TODO: provide built-in normalizer providers?
|
||||
// TODO: pluggability?
|
||||
return normalizers;
|
||||
}
|
||||
|
||||
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
|
||||
return new AnalysisModule.AnalysisProvider<T>() {
|
||||
@Override
|
||||
|
|
|
@ -435,7 +435,7 @@ public class GatewayIndexStateIT extends ESIntegTestCase {
|
|||
assertEquals(ex.getMessage(), "Failed to verify index " + metaData.getIndex());
|
||||
assertNotNull(ex.getCause());
|
||||
assertEquals(IllegalArgumentException.class, ex.getCause().getClass());
|
||||
assertEquals(ex.getCause().getMessage(), "Unknown tokenfilter type [icu_collation] for [myCollator]");
|
||||
assertEquals(ex.getCause().getMessage(), "Unknown filter type [icu_collation] for [myCollator]");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -148,7 +148,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
|
||||
public void testWrapperIsBound() throws IOException {
|
||||
IndexModule module = new IndexModule(indexSettings,
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
module.setSearcherWrapper((s) -> new Wrapper());
|
||||
module.engineFactory.set(new MockEngineFactory(AssertingDirectoryReader.class));
|
||||
|
||||
|
@ -168,7 +168,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
||||
IndexModule module = new IndexModule(indexSettings,
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
module.addIndexStore("foo_store", FooStore::new);
|
||||
try {
|
||||
module.addIndexStore("foo_store", FooStore::new);
|
||||
|
@ -193,7 +193,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
};
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
||||
IndexModule module = new IndexModule(indexSettings,
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
module.addIndexEventListener(eventListener);
|
||||
IndexService indexService = newIndexService(module);
|
||||
IndexSettings x = indexService.getIndexSettings();
|
||||
|
@ -208,7 +208,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
public void testListener() throws IOException {
|
||||
Setting<Boolean> booleanSetting = Setting.boolSetting("index.foo.bar", false, Property.Dynamic, Property.IndexScope);
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings, booleanSetting),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
Setting<Boolean> booleanSetting2 = Setting.boolSetting("index.foo.bar.baz", false, Property.Dynamic, Property.IndexScope);
|
||||
AtomicBoolean atomicBoolean = new AtomicBoolean(false);
|
||||
module.addSettingsUpdateConsumer(booleanSetting, atomicBoolean::set);
|
||||
|
@ -228,7 +228,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
|
||||
public void testAddIndexOperationListener() throws IOException {
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
AtomicBoolean executed = new AtomicBoolean(false);
|
||||
IndexingOperationListener listener = new IndexingOperationListener() {
|
||||
@Override
|
||||
|
@ -257,7 +257,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
|
||||
public void testAddSearchOperationListener() throws IOException {
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
AtomicBoolean executed = new AtomicBoolean(false);
|
||||
SearchOperationListener listener = new SearchOperationListener() {
|
||||
|
||||
|
@ -291,7 +291,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
module.addSimilarity("test_similarity", (string, settings) -> new SimilarityProvider() {
|
||||
@Override
|
||||
public String name() {
|
||||
|
@ -315,7 +315,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
|
||||
public void testFrozen() {
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
module.freeze();
|
||||
String msg = "Can't modify IndexModule once the index service has been created";
|
||||
assertEquals(msg, expectThrows(IllegalStateException.class, () -> module.addSearchOperationListener(null)).getMessage());
|
||||
|
@ -334,7 +334,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
|
||||
assertEquals("Unknown Similarity type [test_similarity] for [my_similarity]", ex.getMessage());
|
||||
}
|
||||
|
@ -346,7 +346,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
|
||||
assertEquals("Similarity [my_similarity] must have an associated type", ex.getMessage());
|
||||
}
|
||||
|
@ -356,7 +356,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
|
||||
expectThrows(AlreadySetException.class, () -> module.forceQueryCacheProvider((a, b) -> new CustomQueryCache()));
|
||||
IndexService indexService = newIndexService(module);
|
||||
|
@ -369,7 +369,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
IndexService indexService = newIndexService(module);
|
||||
assertTrue(indexService.cache().query() instanceof IndexQueryCache);
|
||||
indexService.close("simon says", false);
|
||||
|
@ -381,7 +381,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
|
||||
IndexService indexService = newIndexService(module);
|
||||
assertTrue(indexService.cache().query() instanceof DisabledQueryCache);
|
||||
|
|
|
@ -65,7 +65,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
registry = new AnalysisRegistry(new Environment(settings),
|
||||
emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||
}
|
||||
|
||||
public void testDefaultAnalyzers() throws IOException {
|
||||
|
@ -76,7 +76,8 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
|
||||
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||
.build(idxSettings);
|
||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||
|
@ -88,7 +89,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||
singletonMap("default", analyzerProvider("default"))
|
||||
, emptyMap(), emptyMap(), emptyMap());
|
||||
, emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
|
@ -100,7 +101,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
AnalyzerProvider<?> defaultIndex = new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, new EnglishAnalyzer());
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||
singletonMap("default_index", defaultIndex), emptyMap(), emptyMap(), emptyMap()));
|
||||
singletonMap("default_index", defaultIndex), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
|
||||
assertTrue(e.getMessage().contains("[index.analysis.analyzer.default_index] is not supported"));
|
||||
}
|
||||
|
||||
|
@ -109,7 +110,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
VersionUtils.getPreviousVersion(Version.V_5_0_0_alpha1));
|
||||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||
singletonMap("default_index", analyzerProvider("default_index")), emptyMap(), emptyMap(), emptyMap());
|
||||
singletonMap("default_index", analyzerProvider("default_index")), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||
|
@ -121,7 +122,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
Version version = VersionUtils.randomVersion(random());
|
||||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
|
||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||
singletonMap("default_search", analyzerProvider("default_search")), emptyMap(), emptyMap(), emptyMap());
|
||||
singletonMap("default_search", analyzerProvider("default_search")), emptyMap(), emptyMap(), emptyMap(), emptyMap());
|
||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
|
@ -135,7 +136,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
analyzers.put("default_index", analyzerProvider("default_index"));
|
||||
analyzers.put("default_search", analyzerProvider("default_search"));
|
||||
IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
|
||||
analyzers, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
|
||||
analyzers, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
|
||||
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
|
||||
|
@ -196,10 +197,11 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
|
||||
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||
.build(idxSettings);
|
||||
IndexAnalyzers otherIndexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(),
|
||||
emptyMap()).build(idxSettings);
|
||||
emptyMap(), emptyMap()).build(idxSettings);
|
||||
final int numIters = randomIntBetween(5, 20);
|
||||
for (int i = 0; i < numIters; i++) {
|
||||
PreBuiltAnalyzers preBuiltAnalyzers = RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values());
|
||||
|
@ -219,7 +221,8 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap()).build(idxSettings));
|
||||
() -> new AnalysisRegistry(new Environment(settings),
|
||||
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()).build(idxSettings));
|
||||
assertThat(e.getMessage(), equalTo("analyzer [test_analyzer] must specify either an analyzer type, or a tokenizer"));
|
||||
}
|
||||
|
||||
|
@ -228,7 +231,8 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||
IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
|
||||
emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
|
||||
.build(idxSettings);
|
||||
indexAnalyzers.close();
|
||||
indexAnalyzers.close();
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
||||
|
||||
public void testBasics() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase", "asciifolding")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
|
||||
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
|
||||
assertNotNull(normalizer);
|
||||
assertEquals("my_normalizer", normalizer.name());
|
||||
assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet ete-la"});
|
||||
assertEquals(new BytesRef("cet ete-la"), normalizer.normalize("foo", "Cet été-là"));
|
||||
}
|
||||
|
||||
public void testUnknownType() {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.normalizer.my_normalizer.type", "foobar")
|
||||
.putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase", "asciifolding")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||
assertEquals("Unknown normalizer type [foobar] for [my_normalizer]", e.getMessage());
|
||||
}
|
||||
|
||||
public void testTokenizer() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.normalizer.my_normalizer.tokenizer", "keyword")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||
assertEquals("Custom normalizer [my_normalizer] cannot configure a tokenizer", e.getMessage());
|
||||
}
|
||||
|
||||
public void testCharFilters() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.char_filter.my_mapping.type", "mapping")
|
||||
.putArray("index.analysis.char_filter.my_mapping.mappings", "a => z")
|
||||
.putArray("index.analysis.normalizer.my_normalizer.char_filter", "my_mapping")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
|
||||
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
|
||||
assertNotNull(normalizer);
|
||||
assertEquals("my_normalizer", normalizer.name());
|
||||
assertTokenStreamContents(normalizer.tokenStream("foo", "abc"), new String[] {"zbc"});
|
||||
assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
|
||||
}
|
||||
|
||||
public void testIllegalFilters() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.putArray("index.analysis.normalizer.my_normalizer.filter", "porter_stem")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||
assertEquals("Custom normalizer [my_normalizer] may not use filter [porter_stem]", e.getMessage());
|
||||
}
|
||||
|
||||
public void testIllegalCharFilters() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.putArray("index.analysis.normalizer.my_normalizer.char_filter", "html_strip")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||
assertEquals("Custom normalizer [my_normalizer] may not use char filter [html_strip]", e.getMessage());
|
||||
}
|
||||
}
|
|
@ -2313,7 +2313,7 @@ public class InternalEngineTests extends ESTestCase {
|
|||
Index index = new Index(indexName, "_na_");
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
|
||||
NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
||||
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, Collections.emptyMap());
|
||||
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, Collections.emptyMap(), Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
|
||||
mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
|
||||
|
|
|
@ -25,8 +25,10 @@ import org.apache.lucene.index.IndexableField;
|
|||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.elasticsearch.test.InternalSettingsPlugin;
|
||||
|
@ -51,7 +53,11 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
|||
|
||||
@Before
|
||||
public void setup() {
|
||||
indexService = createIndex("test");
|
||||
indexService = createIndex("test", Settings.builder()
|
||||
.put("index.analysis.normalizer.my_lowercase.type", "custom")
|
||||
.putArray("index.analysis.normalizer.my_lowercase.filter", "lowercase")
|
||||
.put("index.analysis.normalizer.my_asciifolding.type", "custom")
|
||||
.putArray("index.analysis.normalizer.my_asciifolding.filter", "asciifolding").build());
|
||||
parser = indexService.mapperService().documentMapperParser();
|
||||
}
|
||||
|
||||
|
@ -283,6 +289,62 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
|||
assertFalse(fields[0].fieldType().omitNorms());
|
||||
}
|
||||
|
||||
public void testNormalizer() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "keyword").field("normalizer", "my_lowercase").endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "AbC")
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
|
||||
assertEquals(new BytesRef("abc"), fields[0].binaryValue());
|
||||
IndexableFieldType fieldType = fields[0].fieldType();
|
||||
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||
assertFalse(fieldType.tokenized());
|
||||
assertFalse(fieldType.stored());
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||
|
||||
assertEquals(new BytesRef("abc"), fields[1].binaryValue());
|
||||
fieldType = fields[1].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
|
||||
}
|
||||
|
||||
public void testUpdateNormalizer() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "keyword").field("normalizer", "my_lowercase").endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, randomBoolean());
|
||||
|
||||
String mapping2 = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "keyword").field("normalizer", "my_asciifolding").endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> indexService.mapperService().merge("type",
|
||||
new CompressedXContent(mapping2), MergeReason.MAPPING_UPDATE, randomBoolean()));
|
||||
assertEquals(
|
||||
"Mapper for [field] conflicts with existing mapping in other types:\n[mapper [field] has different [normalizer]]",
|
||||
e.getMessage());
|
||||
}
|
||||
|
||||
public void testEmptyName() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject()
|
||||
.startObject("type")
|
||||
|
|
|
@ -20,22 +20,41 @@ package org.elasticsearch.index.mapper;
|
|||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.analysis.AnalyzerScope;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class KeywordFieldTypeTests extends FieldTypeTestCase {
|
||||
|
||||
@Before
|
||||
public void setupProperties() {
|
||||
addModifier(new Modifier("normalizer", false) {
|
||||
@Override
|
||||
public void modify(MappedFieldType ft) {
|
||||
((KeywordFieldType) ft).setNormalizer(Lucene.KEYWORD_ANALYZER);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MappedFieldType createDefaultFieldType() {
|
||||
return new KeywordFieldMapper.KeywordFieldType();
|
||||
|
@ -62,6 +81,31 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase {
|
|||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testTermQueryWithNormalizer() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
Analyzer normalizer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer in = new WhitespaceTokenizer();
|
||||
TokenFilter out = new LowerCaseFilter(in);
|
||||
return new TokenStreamComponents(in, out);
|
||||
}
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
};
|
||||
ft.setSearchAnalyzer(new NamedAnalyzer("my_normalizer", AnalyzerScope.INDEX, normalizer));
|
||||
assertEquals(new TermQuery(new Term("field", "foo bar")), ft.termQuery("fOo BaR", null));
|
||||
|
||||
ft.setIndexOptions(IndexOptions.NONE);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> ft.termQuery("bar", null));
|
||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testTermsQuery() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
|
|
|
@ -101,7 +101,7 @@ public class ParentFieldMapperTests extends ESSingleNodeTestCase {
|
|||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, Settings.EMPTY);
|
||||
NamedAnalyzer namedAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
||||
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, namedAnalyzer, namedAnalyzer, namedAnalyzer,
|
||||
Collections.emptyMap());
|
||||
Collections.emptyMap(), Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
MapperService mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry(), similarityService,
|
||||
new IndicesModule(emptyList()).getMapperRegistry(), () -> null);
|
||||
|
|
|
@ -112,6 +112,8 @@ include::analysis/testing.asciidoc[]
|
|||
|
||||
include::analysis/analyzers.asciidoc[]
|
||||
|
||||
include::analysis/normalizers.asciidoc[]
|
||||
|
||||
include::analysis/tokenizers.asciidoc[]
|
||||
|
||||
include::analysis/tokenfilters.asciidoc[]
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
[[analysis-normalizers]]
|
||||
== Normalizers
|
||||
|
||||
experimental[]
|
||||
|
||||
Normalizers are similar to analyzers except that they may only emit a single
|
||||
token. As a consequence, they do not have a tokenizer and only accept a subset
|
||||
of the available char filters and token filters. Only the filters that work on
|
||||
a per-character basis are allowed. For instance a lowercasing filter would be
|
||||
allowed, but not a stemming filter, which needs to look at the keyword as a
|
||||
whole.
|
||||
|
||||
[float]
|
||||
=== Custom analyzers
|
||||
|
||||
Elasticsearch does not ship with built-in normalizers so far, so the only way
|
||||
to get one is by building a custom one. Custom normalizers take a list of char
|
||||
<<analysis-charfilters, character filters>> and a list of
|
||||
<<analysis-tokenfilters,token filters>>.
|
||||
|
||||
[source,js]
|
||||
--------------------------------
|
||||
PUT index
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"char_filter": {
|
||||
"quote": {
|
||||
"type": "mapping",
|
||||
"mappings": [
|
||||
"« => \"",
|
||||
"» => \""
|
||||
]
|
||||
}
|
||||
},
|
||||
"normalizer": {
|
||||
"my_normalizer": {
|
||||
"type": "custom",
|
||||
"char_filter": ["quote"],
|
||||
"filter": ["lowercase", "asciifolding"]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"type": {
|
||||
"properties": {
|
||||
"foo": {
|
||||
"type": "keyword",
|
||||
"normalizer": "my_normalizer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------
|
||||
// CONSOLE
|
|
@ -8,6 +8,7 @@ parameters that are used by <<mapping-types,field mappings>>:
|
|||
The following mapping parameters are common to some or all field datatypes:
|
||||
|
||||
* <<analyzer,`analyzer`>>
|
||||
* <<normalizer, `normalizer`>>
|
||||
* <<mapping-boost,`boost`>>
|
||||
* <<coerce,`coerce`>>
|
||||
* <<copy-to,`copy_to`>>
|
||||
|
@ -34,6 +35,8 @@ The following mapping parameters are common to some or all field datatypes:
|
|||
|
||||
include::params/analyzer.asciidoc[]
|
||||
|
||||
include::params/normalizer.asciidoc[]
|
||||
|
||||
include::params/boost.asciidoc[]
|
||||
|
||||
include::params/coerce.asciidoc[]
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
[[normalizer]]
|
||||
=== `normalizer`
|
||||
|
||||
The `normalizer` property of <<keyword,`keyword`>> fields is similar to
|
||||
<<analyzer,`analyzer`>> except that it guarantees that the analysis chain
|
||||
produces a single token.
|
||||
|
||||
The `normalizer` is applied prior to indexing the keyword, as well as at
|
||||
search-time when the `keyword` field is searched via a query parser such as
|
||||
the <<query-dsl-match-query,`match`>> query.
|
||||
|
||||
[source,js]
|
||||
--------------------------------
|
||||
PUT index
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"normalizer": {
|
||||
"my_normalizer": {
|
||||
"type": "custom",
|
||||
"char_filter": [],
|
||||
"filter": ["lowercase", "asciifolding"]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"type": {
|
||||
"properties": {
|
||||
"foo": {
|
||||
"type": "keyword",
|
||||
"normalizer": "my_normalizer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT index/type/1
|
||||
{
|
||||
"foo": "BÀR"
|
||||
}
|
||||
|
||||
PUT index/type/2
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
|
||||
PUT index/type/3
|
||||
{
|
||||
"foo": "baz"
|
||||
}
|
||||
|
||||
POST index/_refresh
|
||||
|
||||
GET index/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"foo": "BAR"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------
|
||||
// CONSOLE
|
||||
|
||||
The above query matches documents 1 and 2 since `BÀR` is converted to `bar` at
|
||||
both index and query time.
|
||||
|
||||
[source,js]
|
||||
----------------------------
|
||||
{
|
||||
"took": $body.took,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 5,
|
||||
"successful": 5,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": 2,
|
||||
"max_score": 0.2876821,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "index",
|
||||
"_type": "type",
|
||||
"_id": "2",
|
||||
"_score": 0.2876821,
|
||||
"_source": {
|
||||
"foo": "bar"
|
||||
}
|
||||
},
|
||||
{
|
||||
"_index": "index",
|
||||
"_type": "type",
|
||||
"_id": "1",
|
||||
"_score": 0.2876821,
|
||||
"_source": {
|
||||
"foo": "BÀR"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
----------------------------
|
||||
// TESTRESPONSE[s/"took".*/"took": "$body.took",/]
|
||||
|
||||
Also, the fact that keywords are converted prior to indexing also means that
|
||||
aggregations return normalized values:
|
||||
|
||||
[source,js]
|
||||
----------------------------
|
||||
GET index/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"foo_terms": {
|
||||
"terms": {
|
||||
"field": "foo"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
|
||||
returns
|
||||
|
||||
[source,js]
|
||||
----------------------------
|
||||
{
|
||||
"took": 43,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 5,
|
||||
"successful": 5,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": 3,
|
||||
"max_score": 0.0,
|
||||
"hits": []
|
||||
},
|
||||
"aggregations": {
|
||||
"foo_terms": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "bar",
|
||||
"doc_count": 2
|
||||
},
|
||||
{
|
||||
"key": "baz",
|
||||
"doc_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
----------------------------
|
||||
// TESTRESPONSE[s/"took".*/"took": "$body.took",/]
|
|
@ -109,6 +109,12 @@ The following parameters are accepted by `keyword` fields:
|
|||
Which scoring algorithm or _similarity_ should be used. Defaults
|
||||
to `classic`, which uses TF/IDF.
|
||||
|
||||
<<normalizer,`normalizer`>>::
|
||||
|
||||
experimental[]
|
||||
How to pre-process the keyword prior to indexing. Defaults to `null`,
|
||||
meaning the keyword is kept as-is.
|
||||
|
||||
NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will
|
||||
attempt to downgrade `keyword` into `string`. This allows you to merge modern
|
||||
mappings with legacy mappings. Long lived indexes will have to be recreated
|
||||
|
|
Loading…
Reference in New Issue