Add an option to split keyword field on whitespace at query time (#30691)
This change adds an option named `split_queries_on_whitespace` to the `keyword` field type. When set to true full text queries (`match`, `multi_match`, `query_string`, ...) that target the field will split the input on whitespace to build the query terms. Defaults to `false`. Closes #30393
This commit is contained in:
parent
cea3c28b5b
commit
0791f93dbd
|
@ -103,6 +103,12 @@ The following parameters are accepted by `keyword` fields:
|
||||||
How to pre-process the keyword prior to indexing. Defaults to `null`,
|
How to pre-process the keyword prior to indexing. Defaults to `null`,
|
||||||
meaning the keyword is kept as-is.
|
meaning the keyword is kept as-is.
|
||||||
|
|
||||||
|
`split_queries_on_whitespace`::
|
||||||
|
|
||||||
|
Whether <<full-text-queries,full text queries>> should split the input on whitespace
|
||||||
|
when building a query for this field.
|
||||||
|
Accepts `true` or `false` (default).
|
||||||
|
|
||||||
NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will
|
NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will
|
||||||
attempt to downgrade `keyword` into `string`. This allows you to merge modern
|
attempt to downgrade `keyword` into `string`. This allows you to merge modern
|
||||||
mappings with legacy mappings. Long lived indexes will have to be recreated
|
mappings with legacy mappings. Long lived indexes will have to be recreated
|
||||||
|
|
|
@ -186,7 +186,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
|
||||||
return Collections.emptySet();
|
return Collections.emptySet();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap)) {
|
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap, analyzerMap)) {
|
||||||
MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService,
|
MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService,
|
||||||
mapperRegistry, () -> null);
|
mapperRegistry, () -> null);
|
||||||
mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY);
|
mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY);
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.elasticsearch.core.internal.io.IOUtils;
|
import org.elasticsearch.core.internal.io.IOUtils;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
|
@ -453,13 +454,16 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
analyzerProviders = new HashMap<>(analyzerProviders);
|
analyzerProviders = new HashMap<>(analyzerProviders);
|
||||||
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
|
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
|
||||||
Map<String, NamedAnalyzer> normalizers = new HashMap<>();
|
Map<String, NamedAnalyzer> normalizers = new HashMap<>();
|
||||||
|
Map<String, NamedAnalyzer> whitespaceNormalizers = new HashMap<>();
|
||||||
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
|
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
|
||||||
processAnalyzerFactory(indexSettings, entry.getKey(), entry.getValue(), analyzers,
|
processAnalyzerFactory(indexSettings, entry.getKey(), entry.getValue(), analyzers,
|
||||||
tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories);
|
tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories);
|
||||||
}
|
}
|
||||||
for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) {
|
for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) {
|
||||||
processNormalizerFactory(entry.getKey(), entry.getValue(), normalizers,
|
processNormalizerFactory(entry.getKey(), entry.getValue(), normalizers,
|
||||||
tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories);
|
"keyword", tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories);
|
||||||
|
processNormalizerFactory(entry.getKey(), entry.getValue(), whitespaceNormalizers,
|
||||||
|
"whitespace", () -> new WhitespaceTokenizer(), tokenFilterFactoryFactories, charFilterFactoryFactories);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!analyzers.containsKey("default")) {
|
if (!analyzers.containsKey("default")) {
|
||||||
|
@ -489,7 +493,7 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer,
|
return new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer,
|
||||||
unmodifiableMap(analyzers), unmodifiableMap(normalizers));
|
unmodifiableMap(analyzers), unmodifiableMap(normalizers), unmodifiableMap(whitespaceNormalizers));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processAnalyzerFactory(IndexSettings indexSettings,
|
private void processAnalyzerFactory(IndexSettings indexSettings,
|
||||||
|
@ -545,15 +549,16 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
String name,
|
String name,
|
||||||
AnalyzerProvider<?> normalizerFactory,
|
AnalyzerProvider<?> normalizerFactory,
|
||||||
Map<String, NamedAnalyzer> normalizers,
|
Map<String, NamedAnalyzer> normalizers,
|
||||||
TokenizerFactory keywordTokenizerFactory,
|
String tokenizerName,
|
||||||
|
TokenizerFactory tokenizerFactory,
|
||||||
Map<String, TokenFilterFactory> tokenFilters,
|
Map<String, TokenFilterFactory> tokenFilters,
|
||||||
Map<String, CharFilterFactory> charFilters) {
|
Map<String, CharFilterFactory> charFilters) {
|
||||||
if (keywordTokenizerFactory == null) {
|
if (tokenizerFactory == null) {
|
||||||
throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module");
|
throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (normalizerFactory instanceof CustomNormalizerProvider) {
|
if (normalizerFactory instanceof CustomNormalizerProvider) {
|
||||||
((CustomNormalizerProvider) normalizerFactory).build(keywordTokenizerFactory, charFilters, tokenFilters);
|
((CustomNormalizerProvider) normalizerFactory).build(tokenizerName, tokenizerFactory, charFilters, tokenFilters);
|
||||||
}
|
}
|
||||||
Analyzer normalizerF = normalizerFactory.get();
|
Analyzer normalizerF = normalizerFactory.get();
|
||||||
if (normalizerF == null) {
|
if (normalizerF == null) {
|
||||||
|
|
|
@ -38,15 +38,14 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
|
||||||
private CustomAnalyzer customAnalyzer;
|
private CustomAnalyzer customAnalyzer;
|
||||||
|
|
||||||
public CustomNormalizerProvider(IndexSettings indexSettings,
|
public CustomNormalizerProvider(IndexSettings indexSettings,
|
||||||
String name, Settings settings) {
|
String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
this.analyzerSettings = settings;
|
this.analyzerSettings = settings;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void build(final TokenizerFactory keywordTokenizerFactory, final Map<String, CharFilterFactory> charFilters,
|
public void build(final String tokenizerName, final TokenizerFactory tokenizerFactory, final Map<String, CharFilterFactory> charFilters,
|
||||||
final Map<String, TokenFilterFactory> tokenFilters) {
|
final Map<String, TokenFilterFactory> tokenFilters) {
|
||||||
String tokenizerName = analyzerSettings.get("tokenizer");
|
if (analyzerSettings.get("tokenizer") != null) {
|
||||||
if (tokenizerName != null) {
|
|
||||||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer");
|
throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,8 +81,8 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
|
||||||
}
|
}
|
||||||
|
|
||||||
this.customAnalyzer = new CustomAnalyzer(
|
this.customAnalyzer = new CustomAnalyzer(
|
||||||
"keyword",
|
tokenizerName,
|
||||||
keywordTokenizerFactory,
|
tokenizerFactory,
|
||||||
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
|
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
|
||||||
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])
|
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])
|
||||||
);
|
);
|
||||||
|
|
|
@ -40,11 +40,12 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
||||||
private final NamedAnalyzer defaultSearchQuoteAnalyzer;
|
private final NamedAnalyzer defaultSearchQuoteAnalyzer;
|
||||||
private final Map<String, NamedAnalyzer> analyzers;
|
private final Map<String, NamedAnalyzer> analyzers;
|
||||||
private final Map<String, NamedAnalyzer> normalizers;
|
private final Map<String, NamedAnalyzer> normalizers;
|
||||||
|
private final Map<String, NamedAnalyzer> whitespaceNormalizers;
|
||||||
private final IndexSettings indexSettings;
|
private final IndexSettings indexSettings;
|
||||||
|
|
||||||
public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer,
|
public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer,
|
||||||
NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers,
|
NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers,
|
||||||
Map<String, NamedAnalyzer> normalizers) {
|
Map<String, NamedAnalyzer> normalizers, Map<String, NamedAnalyzer> whitespaceNormalizers) {
|
||||||
super(indexSettings);
|
super(indexSettings);
|
||||||
if (defaultIndexAnalyzer.name().equals("default") == false) {
|
if (defaultIndexAnalyzer.name().equals("default") == false) {
|
||||||
throw new IllegalStateException("default analyzer must have the name [default] but was: [" + defaultIndexAnalyzer.name() + "]");
|
throw new IllegalStateException("default analyzer must have the name [default] but was: [" + defaultIndexAnalyzer.name() + "]");
|
||||||
|
@ -54,6 +55,7 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
||||||
this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer;
|
this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer;
|
||||||
this.analyzers = analyzers;
|
this.analyzers = analyzers;
|
||||||
this.normalizers = normalizers;
|
this.normalizers = normalizers;
|
||||||
|
this.whitespaceNormalizers = whitespaceNormalizers;
|
||||||
this.indexSettings = indexSettings;
|
this.indexSettings = indexSettings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,6 +73,13 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
|
||||||
return normalizers.get(name);
|
return normalizers.get(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a normalizer that splits on whitespace mapped to the given name or <code>null</code> if not present
|
||||||
|
*/
|
||||||
|
public NamedAnalyzer getWhitespaceNormalizer(String name) {
|
||||||
|
return whitespaceNormalizers.get(name);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the default index analyzer for this index
|
* Returns the default index analyzer for this index
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
package org.elasticsearch.index.mapper;
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
|
@ -35,6 +36,8 @@ import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||||
|
import org.elasticsearch.index.analysis.AnalyzerScope;
|
||||||
|
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
||||||
|
@ -73,6 +76,8 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
protected String nullValue = Defaults.NULL_VALUE;
|
protected String nullValue = Defaults.NULL_VALUE;
|
||||||
protected int ignoreAbove = Defaults.IGNORE_ABOVE;
|
protected int ignoreAbove = Defaults.IGNORE_ABOVE;
|
||||||
|
private IndexAnalyzers indexAnalyzers;
|
||||||
|
private String normalizerName;
|
||||||
|
|
||||||
public Builder(String name) {
|
public Builder(String name) {
|
||||||
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
|
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
|
||||||
|
@ -106,15 +111,36 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder normalizer(NamedAnalyzer normalizer) {
|
public Builder splitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) {
|
||||||
fieldType().setNormalizer(normalizer);
|
fieldType().setSplitQueriesOnWhitespace(splitQueriesOnWhitespace);
|
||||||
fieldType().setSearchAnalyzer(normalizer);
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder normalizer(IndexAnalyzers indexAnalyzers, String name) {
|
||||||
|
this.indexAnalyzers = indexAnalyzers;
|
||||||
|
this.normalizerName = name;
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public KeywordFieldMapper build(BuilderContext context) {
|
public KeywordFieldMapper build(BuilderContext context) {
|
||||||
setupFieldType(context);
|
setupFieldType(context);
|
||||||
|
if (normalizerName != null) {
|
||||||
|
NamedAnalyzer normalizer = indexAnalyzers.getNormalizer(normalizerName);
|
||||||
|
if (normalizer == null) {
|
||||||
|
throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]");
|
||||||
|
}
|
||||||
|
fieldType().setNormalizer(normalizer);
|
||||||
|
final NamedAnalyzer searchAnalyzer;
|
||||||
|
if (fieldType().splitQueriesOnWhitespace) {
|
||||||
|
searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName);
|
||||||
|
} else {
|
||||||
|
searchAnalyzer = normalizer;
|
||||||
|
}
|
||||||
|
fieldType().setSearchAnalyzer(searchAnalyzer);
|
||||||
|
} else if (fieldType().splitQueriesOnWhitespace) {
|
||||||
|
fieldType().setSearchAnalyzer(new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()));
|
||||||
|
}
|
||||||
return new KeywordFieldMapper(
|
return new KeywordFieldMapper(
|
||||||
name, fieldType, defaultFieldType, ignoreAbove,
|
name, fieldType, defaultFieldType, ignoreAbove,
|
||||||
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
|
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
|
||||||
|
@ -147,13 +173,12 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("normalizer")) {
|
} else if (propName.equals("normalizer")) {
|
||||||
if (propNode != null) {
|
if (propNode != null) {
|
||||||
NamedAnalyzer normalizer = parserContext.getIndexAnalyzers().getNormalizer(propNode.toString());
|
builder.normalizer(parserContext.getIndexAnalyzers(), propNode.toString());
|
||||||
if (normalizer == null) {
|
|
||||||
throw new MapperParsingException("normalizer [" + propNode.toString() + "] not found for field [" + name + "]");
|
|
||||||
}
|
|
||||||
builder.normalizer(normalizer);
|
|
||||||
}
|
}
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
|
} else if (propName.equals("split_queries_on_whitespace")) {
|
||||||
|
builder.splitQueriesOnWhitespace(XContentMapValues.nodeBooleanValue(propNode, "split_queries_on_whitespace"));
|
||||||
|
iterator.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return builder;
|
return builder;
|
||||||
|
@ -163,6 +188,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
public static final class KeywordFieldType extends StringFieldType {
|
public static final class KeywordFieldType extends StringFieldType {
|
||||||
|
|
||||||
private NamedAnalyzer normalizer = null;
|
private NamedAnalyzer normalizer = null;
|
||||||
|
private boolean splitQueriesOnWhitespace;
|
||||||
|
|
||||||
public KeywordFieldType() {
|
public KeywordFieldType() {
|
||||||
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
|
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||||
|
@ -172,6 +198,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
protected KeywordFieldType(KeywordFieldType ref) {
|
protected KeywordFieldType(KeywordFieldType ref) {
|
||||||
super(ref);
|
super(ref);
|
||||||
this.normalizer = ref.normalizer;
|
this.normalizer = ref.normalizer;
|
||||||
|
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace;
|
||||||
}
|
}
|
||||||
|
|
||||||
public KeywordFieldType clone() {
|
public KeywordFieldType clone() {
|
||||||
|
@ -183,7 +210,9 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
if (super.equals(o) == false) {
|
if (super.equals(o) == false) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return Objects.equals(normalizer, ((KeywordFieldType) o).normalizer);
|
KeywordFieldType other = (KeywordFieldType) o;
|
||||||
|
return Objects.equals(normalizer, other.normalizer) &&
|
||||||
|
splitQueriesOnWhitespace == other.splitQueriesOnWhitespace;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -197,7 +226,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return 31 * super.hashCode() + Objects.hashCode(normalizer);
|
return 31 * super.hashCode() + Objects.hash(normalizer, splitQueriesOnWhitespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -214,6 +243,15 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
this.normalizer = normalizer;
|
this.normalizer = normalizer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean splitQueriesOnWhitespace() {
|
||||||
|
return splitQueriesOnWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSplitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) {
|
||||||
|
checkIfFrozen();
|
||||||
|
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query existsQuery(QueryShardContext context) {
|
public Query existsQuery(QueryShardContext context) {
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
|
@ -263,7 +301,8 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
private int ignoreAbove;
|
private int ignoreAbove;
|
||||||
|
|
||||||
protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||||
int ignoreAbove, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
int ignoreAbove, Settings indexSettings,
|
||||||
|
MultiFields multiFields, CopyTo copyTo) {
|
||||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||||
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
|
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
|
||||||
this.ignoreAbove = ignoreAbove;
|
this.ignoreAbove = ignoreAbove;
|
||||||
|
@ -366,5 +405,9 @@ public final class KeywordFieldMapper extends FieldMapper {
|
||||||
} else if (includeDefaults) {
|
} else if (includeDefaults) {
|
||||||
builder.nullField("normalizer");
|
builder.nullField("normalizer");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || fieldType().splitQueriesOnWhitespace) {
|
||||||
|
builder.field("split_queries_on_whitespace", fieldType().splitQueriesOnWhitespace);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,6 +52,7 @@ import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||||
import org.elasticsearch.common.lucene.search.Queries;
|
import org.elasticsearch.common.lucene.search.Queries;
|
||||||
import org.elasticsearch.common.unit.Fuzziness;
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.query.QueryShardContext;
|
import org.elasticsearch.index.query.QueryShardContext;
|
||||||
import org.elasticsearch.index.query.support.QueryParsers;
|
import org.elasticsearch.index.query.support.QueryParsers;
|
||||||
|
@ -262,7 +263,8 @@ public class MatchQuery {
|
||||||
* passing through QueryBuilder.
|
* passing through QueryBuilder.
|
||||||
*/
|
*/
|
||||||
boolean noForcedAnalyzer = this.analyzer == null;
|
boolean noForcedAnalyzer = this.analyzer == null;
|
||||||
if (fieldType.tokenized() == false && noForcedAnalyzer) {
|
if (fieldType.tokenized() == false && noForcedAnalyzer &&
|
||||||
|
fieldType instanceof KeywordFieldMapper.KeywordFieldType == false) {
|
||||||
return blendTermQuery(new Term(fieldName, value.toString()), fieldType);
|
return blendTermQuery(new Term(fieldName, value.toString()), fieldType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,12 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
||||||
assertEquals("my_normalizer", normalizer.name());
|
assertEquals("my_normalizer", normalizer.name());
|
||||||
assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet été-là"});
|
assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet été-là"});
|
||||||
assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là"));
|
assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là"));
|
||||||
|
|
||||||
|
normalizer = analysis.indexAnalyzers.getWhitespaceNormalizer("my_normalizer");
|
||||||
|
assertNotNull(normalizer);
|
||||||
|
assertEquals("my_normalizer", normalizer.name());
|
||||||
|
assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet", "été-là"});
|
||||||
|
assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUnknownType() {
|
public void testUnknownType() {
|
||||||
|
@ -88,7 +94,13 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
||||||
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
|
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
|
||||||
assertNotNull(normalizer);
|
assertNotNull(normalizer);
|
||||||
assertEquals("my_normalizer", normalizer.name());
|
assertEquals("my_normalizer", normalizer.name());
|
||||||
assertTokenStreamContents(normalizer.tokenStream("foo", "abc"), new String[] {"zbc"});
|
assertTokenStreamContents(normalizer.tokenStream("foo", "abc acd"), new String[] {"zbc zcd"});
|
||||||
|
assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
|
||||||
|
|
||||||
|
normalizer = analysis.indexAnalyzers.getWhitespaceNormalizer("my_normalizer");
|
||||||
|
assertNotNull(normalizer);
|
||||||
|
assertEquals("my_normalizer", normalizer.name());
|
||||||
|
assertTokenStreamContents(normalizer.tokenStream("foo", "abc acd"), new String[] {"zbc", "zcd"});
|
||||||
assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
|
assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -51,9 +51,11 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static java.util.Collections.singletonList;
|
import static java.util.Collections.singletonList;
|
||||||
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
|
||||||
import static java.util.Collections.singletonMap;
|
import static java.util.Collections.singletonMap;
|
||||||
import static org.hamcrest.Matchers.containsString;
|
import static org.hamcrest.Matchers.containsString;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
|
||||||
public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
/**
|
/**
|
||||||
|
@ -411,4 +413,59 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
);
|
);
|
||||||
assertThat(e.getMessage(), containsString("name cannot be empty string"));
|
assertThat(e.getMessage(), containsString("name cannot be empty string"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSplitQueriesOnWhitespace() throws IOException {
|
||||||
|
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
|
||||||
|
.startObject("type")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("field")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.endObject()
|
||||||
|
.startObject("field_with_normalizer")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.field("normalizer", "my_lowercase")
|
||||||
|
.field("split_queries_on_whitespace", true)
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject());
|
||||||
|
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
|
||||||
|
|
||||||
|
MappedFieldType fieldType = indexService.mapperService().fullName("field");
|
||||||
|
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
|
||||||
|
KeywordFieldMapper.KeywordFieldType ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
|
||||||
|
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"Hello World"});
|
||||||
|
|
||||||
|
fieldType = indexService.mapperService().fullName("field_with_normalizer");
|
||||||
|
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
|
||||||
|
ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
|
||||||
|
assertThat(ft.searchAnalyzer().name(), equalTo("my_lowercase"));
|
||||||
|
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"hello", "world"});
|
||||||
|
|
||||||
|
mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
|
||||||
|
.startObject("type")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("field")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.field("split_queries_on_whitespace", true)
|
||||||
|
.endObject()
|
||||||
|
.startObject("field_with_normalizer")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.field("normalizer", "my_lowercase")
|
||||||
|
.field("split_queries_on_whitespace", false)
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject());
|
||||||
|
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
|
||||||
|
|
||||||
|
fieldType = indexService.mapperService().fullName("field");
|
||||||
|
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
|
||||||
|
ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
|
||||||
|
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"Hello", "World"});
|
||||||
|
|
||||||
|
fieldType = indexService.mapperService().fullName("field_with_normalizer");
|
||||||
|
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
|
||||||
|
ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
|
||||||
|
assertThat(ft.searchAnalyzer().name(), equalTo("my_lowercase"));
|
||||||
|
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"hello world"});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.search;
|
||||||
import org.apache.lucene.analysis.MockSynonymAnalyzer;
|
import org.apache.lucene.analysis.MockSynonymAnalyzer;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.BlendedTermQuery;
|
import org.apache.lucene.queries.BlendedTermQuery;
|
||||||
|
import org.apache.lucene.queryparser.xml.builders.DisjunctionMaxQueryBuilder;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
|
@ -33,9 +34,12 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SynonymQuery;
|
import org.apache.lucene.search.SynonymQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.compress.CompressedXContent;
|
import org.elasticsearch.common.compress.CompressedXContent;
|
||||||
import org.elasticsearch.common.lucene.search.Queries;
|
import org.elasticsearch.common.lucene.search.Queries;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.xcontent.XContent;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
import org.elasticsearch.index.engine.Engine;
|
import org.elasticsearch.index.engine.Engine;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
|
@ -43,12 +47,16 @@ import org.elasticsearch.index.mapper.MockFieldMapper.FakeFieldType;
|
||||||
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
||||||
import org.elasticsearch.index.query.QueryShardContext;
|
import org.elasticsearch.index.query.QueryShardContext;
|
||||||
import org.elasticsearch.index.search.MultiMatchQuery.FieldAndFieldType;
|
import org.elasticsearch.index.search.MultiMatchQuery.FieldAndFieldType;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
import org.elasticsearch.test.MockKeywordPlugin;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -60,6 +68,11 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
private IndexService indexService;
|
private IndexService indexService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Class<? extends Plugin>> getPlugins() {
|
||||||
|
return Collections.singleton(MockKeywordPlugin.class);
|
||||||
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
Settings settings = Settings.builder()
|
Settings settings = Settings.builder()
|
||||||
|
@ -276,4 +289,58 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
||||||
.build();
|
.build();
|
||||||
assertEquals(expected, query);
|
assertEquals(expected, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testKeywordSplitQueriesOnWhitespace() throws IOException {
|
||||||
|
IndexService indexService = createIndex("test_keyword", Settings.builder()
|
||||||
|
.put("index.analysis.normalizer.my_lowercase.type", "custom")
|
||||||
|
.putList("index.analysis.normalizer.my_lowercase.filter", "lowercase").build());
|
||||||
|
MapperService mapperService = indexService.mapperService();
|
||||||
|
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
|
||||||
|
.startObject("type")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("field")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.endObject()
|
||||||
|
.startObject("field_normalizer")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.field("normalizer", "my_lowercase")
|
||||||
|
.endObject()
|
||||||
|
.startObject("field_split")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.field("split_queries_on_whitespace", true)
|
||||||
|
.endObject()
|
||||||
|
.startObject("field_split_normalizer")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.field("normalizer", "my_lowercase")
|
||||||
|
.field("split_queries_on_whitespace", true)
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject());
|
||||||
|
mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE);
|
||||||
|
QueryShardContext queryShardContext = indexService.newQueryShardContext(
|
||||||
|
randomInt(20), null, () -> {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}, null);
|
||||||
|
MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
|
||||||
|
Map<String, Float> fieldNames = new HashMap<>();
|
||||||
|
fieldNames.put("field", 1.0f);
|
||||||
|
fieldNames.put("field_split", 1.0f);
|
||||||
|
fieldNames.put("field_normalizer", 1.0f);
|
||||||
|
fieldNames.put("field_split_normalizer", 1.0f);
|
||||||
|
Query query = parser.parse(MultiMatchQueryBuilder.Type.BEST_FIELDS, fieldNames, "Foo Bar", null);
|
||||||
|
DisjunctionMaxQuery expected = new DisjunctionMaxQuery(
|
||||||
|
Arrays.asList(
|
||||||
|
new TermQuery(new Term("field_normalizer", "foo bar")),
|
||||||
|
new TermQuery(new Term("field", "Foo Bar")),
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("field_split", "Foo")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("field_split", "Bar")), BooleanClause.Occur.SHOULD)
|
||||||
|
.build(),
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("field_split_normalizer", "foo")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("field_split_normalizer", "bar")), BooleanClause.Occur.SHOULD)
|
||||||
|
.build()
|
||||||
|
), 0.0f);
|
||||||
|
assertThat(query, equalTo(expected));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class TranslogHandler implements EngineConfig.TranslogRecoveryRunner {
|
||||||
public TranslogHandler(NamedXContentRegistry xContentRegistry, IndexSettings indexSettings) {
|
public TranslogHandler(NamedXContentRegistry xContentRegistry, IndexSettings indexSettings) {
|
||||||
NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
||||||
IndexAnalyzers indexAnalyzers =
|
IndexAnalyzers indexAnalyzers =
|
||||||
new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, emptyMap(), emptyMap());
|
new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, emptyMap(), emptyMap(), emptyMap());
|
||||||
SimilarityService similarityService = new SimilarityService(indexSettings, null, emptyMap());
|
SimilarityService similarityService = new SimilarityService(indexSettings, null, emptyMap());
|
||||||
MapperRegistry mapperRegistry = new IndicesModule(emptyList()).getMapperRegistry();
|
MapperRegistry mapperRegistry = new IndicesModule(emptyList()).getMapperRegistry();
|
||||||
mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
|
mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
|
||||||
|
|
Loading…
Reference in New Issue