Add an option to split keyword field on whitespace at query time (#30691)

This change adds an option named `split_queries_on_whitespace` to the `keyword`
field type. When set to true full text queries (`match`, `multi_match`, `query_string`, ...) that target the field will split the input on whitespace to build the query terms. Defaults to `false`.
Closes #30393
This commit is contained in:
Jim Ferenczi 2018-06-01 09:47:03 +02:00 committed by GitHub
parent cea3c28b5b
commit 0791f93dbd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 227 additions and 27 deletions

View File

@ -103,6 +103,12 @@ The following parameters are accepted by `keyword` fields:
How to pre-process the keyword prior to indexing. Defaults to `null`, How to pre-process the keyword prior to indexing. Defaults to `null`,
meaning the keyword is kept as-is. meaning the keyword is kept as-is.
`split_queries_on_whitespace`::
Whether <<full-text-queries,full text queries>> should split the input on whitespace
when building a query for this field.
Accepts `true` or `false` (default).
NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will
attempt to downgrade `keyword` into `string`. This allows you to merge modern attempt to downgrade `keyword` into `string`. This allows you to merge modern
mappings with legacy mappings. Long lived indexes will have to be recreated mappings with legacy mappings. Long lived indexes will have to be recreated

View File

@ -186,7 +186,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
return Collections.emptySet(); return Collections.emptySet();
} }
}; };
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap)) { try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap, analyzerMap)) {
MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService, MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService,
mapperRegistry, () -> null); mapperRegistry, () -> null);
mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY); mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version; import org.elasticsearch.Version;
@ -453,13 +454,16 @@ public final class AnalysisRegistry implements Closeable {
analyzerProviders = new HashMap<>(analyzerProviders); analyzerProviders = new HashMap<>(analyzerProviders);
Map<String, NamedAnalyzer> analyzers = new HashMap<>(); Map<String, NamedAnalyzer> analyzers = new HashMap<>();
Map<String, NamedAnalyzer> normalizers = new HashMap<>(); Map<String, NamedAnalyzer> normalizers = new HashMap<>();
Map<String, NamedAnalyzer> whitespaceNormalizers = new HashMap<>();
for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) { for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
processAnalyzerFactory(indexSettings, entry.getKey(), entry.getValue(), analyzers, processAnalyzerFactory(indexSettings, entry.getKey(), entry.getValue(), analyzers,
tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories); tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories);
} }
for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) { for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) {
processNormalizerFactory(entry.getKey(), entry.getValue(), normalizers, processNormalizerFactory(entry.getKey(), entry.getValue(), normalizers,
tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories); "keyword", tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories);
processNormalizerFactory(entry.getKey(), entry.getValue(), whitespaceNormalizers,
"whitespace", () -> new WhitespaceTokenizer(), tokenFilterFactoryFactories, charFilterFactoryFactories);
} }
if (!analyzers.containsKey("default")) { if (!analyzers.containsKey("default")) {
@ -489,7 +493,7 @@ public final class AnalysisRegistry implements Closeable {
} }
} }
return new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer, return new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer,
unmodifiableMap(analyzers), unmodifiableMap(normalizers)); unmodifiableMap(analyzers), unmodifiableMap(normalizers), unmodifiableMap(whitespaceNormalizers));
} }
private void processAnalyzerFactory(IndexSettings indexSettings, private void processAnalyzerFactory(IndexSettings indexSettings,
@ -545,15 +549,16 @@ public final class AnalysisRegistry implements Closeable {
String name, String name,
AnalyzerProvider<?> normalizerFactory, AnalyzerProvider<?> normalizerFactory,
Map<String, NamedAnalyzer> normalizers, Map<String, NamedAnalyzer> normalizers,
TokenizerFactory keywordTokenizerFactory, String tokenizerName,
TokenizerFactory tokenizerFactory,
Map<String, TokenFilterFactory> tokenFilters, Map<String, TokenFilterFactory> tokenFilters,
Map<String, CharFilterFactory> charFilters) { Map<String, CharFilterFactory> charFilters) {
if (keywordTokenizerFactory == null) { if (tokenizerFactory == null) {
throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module"); throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module");
} }
if (normalizerFactory instanceof CustomNormalizerProvider) { if (normalizerFactory instanceof CustomNormalizerProvider) {
((CustomNormalizerProvider) normalizerFactory).build(keywordTokenizerFactory, charFilters, tokenFilters); ((CustomNormalizerProvider) normalizerFactory).build(tokenizerName, tokenizerFactory, charFilters, tokenFilters);
} }
Analyzer normalizerF = normalizerFactory.get(); Analyzer normalizerF = normalizerFactory.get();
if (normalizerF == null) { if (normalizerF == null) {

View File

@ -38,15 +38,14 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
private CustomAnalyzer customAnalyzer; private CustomAnalyzer customAnalyzer;
public CustomNormalizerProvider(IndexSettings indexSettings, public CustomNormalizerProvider(IndexSettings indexSettings,
String name, Settings settings) { String name, Settings settings) {
super(indexSettings, name, settings); super(indexSettings, name, settings);
this.analyzerSettings = settings; this.analyzerSettings = settings;
} }
public void build(final TokenizerFactory keywordTokenizerFactory, final Map<String, CharFilterFactory> charFilters, public void build(final String tokenizerName, final TokenizerFactory tokenizerFactory, final Map<String, CharFilterFactory> charFilters,
final Map<String, TokenFilterFactory> tokenFilters) { final Map<String, TokenFilterFactory> tokenFilters) {
String tokenizerName = analyzerSettings.get("tokenizer"); if (analyzerSettings.get("tokenizer") != null) {
if (tokenizerName != null) {
throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer"); throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer");
} }
@ -82,8 +81,8 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
} }
this.customAnalyzer = new CustomAnalyzer( this.customAnalyzer = new CustomAnalyzer(
"keyword", tokenizerName,
keywordTokenizerFactory, tokenizerFactory,
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]), charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()]) tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])
); );

View File

@ -40,11 +40,12 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
private final NamedAnalyzer defaultSearchQuoteAnalyzer; private final NamedAnalyzer defaultSearchQuoteAnalyzer;
private final Map<String, NamedAnalyzer> analyzers; private final Map<String, NamedAnalyzer> analyzers;
private final Map<String, NamedAnalyzer> normalizers; private final Map<String, NamedAnalyzer> normalizers;
private final Map<String, NamedAnalyzer> whitespaceNormalizers;
private final IndexSettings indexSettings; private final IndexSettings indexSettings;
public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer, public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer,
NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers, NamedAnalyzer defaultSearchQuoteAnalyzer, Map<String, NamedAnalyzer> analyzers,
Map<String, NamedAnalyzer> normalizers) { Map<String, NamedAnalyzer> normalizers, Map<String, NamedAnalyzer> whitespaceNormalizers) {
super(indexSettings); super(indexSettings);
if (defaultIndexAnalyzer.name().equals("default") == false) { if (defaultIndexAnalyzer.name().equals("default") == false) {
throw new IllegalStateException("default analyzer must have the name [default] but was: [" + defaultIndexAnalyzer.name() + "]"); throw new IllegalStateException("default analyzer must have the name [default] but was: [" + defaultIndexAnalyzer.name() + "]");
@ -54,6 +55,7 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer; this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer;
this.analyzers = analyzers; this.analyzers = analyzers;
this.normalizers = normalizers; this.normalizers = normalizers;
this.whitespaceNormalizers = whitespaceNormalizers;
this.indexSettings = indexSettings; this.indexSettings = indexSettings;
} }
@ -71,6 +73,13 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos
return normalizers.get(name); return normalizers.get(name);
} }
/**
* Returns a normalizer that splits on whitespace mapped to the given name or <code>null</code> if not present
*/
public NamedAnalyzer getWhitespaceNormalizer(String name) {
return whitespaceNormalizers.get(name);
}
/** /**
* Returns the default index analyzer for this index * Returns the default index analyzer for this index
*/ */

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.mapper; package org.elasticsearch.index.mapper;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField;
@ -35,6 +36,8 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -73,6 +76,8 @@ public final class KeywordFieldMapper extends FieldMapper {
protected String nullValue = Defaults.NULL_VALUE; protected String nullValue = Defaults.NULL_VALUE;
protected int ignoreAbove = Defaults.IGNORE_ABOVE; protected int ignoreAbove = Defaults.IGNORE_ABOVE;
private IndexAnalyzers indexAnalyzers;
private String normalizerName;
public Builder(String name) { public Builder(String name) {
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
@ -106,15 +111,36 @@ public final class KeywordFieldMapper extends FieldMapper {
return builder; return builder;
} }
public Builder normalizer(NamedAnalyzer normalizer) { public Builder splitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) {
fieldType().setNormalizer(normalizer); fieldType().setSplitQueriesOnWhitespace(splitQueriesOnWhitespace);
fieldType().setSearchAnalyzer(normalizer); return builder;
}
public Builder normalizer(IndexAnalyzers indexAnalyzers, String name) {
this.indexAnalyzers = indexAnalyzers;
this.normalizerName = name;
return builder; return builder;
} }
@Override @Override
public KeywordFieldMapper build(BuilderContext context) { public KeywordFieldMapper build(BuilderContext context) {
setupFieldType(context); setupFieldType(context);
if (normalizerName != null) {
NamedAnalyzer normalizer = indexAnalyzers.getNormalizer(normalizerName);
if (normalizer == null) {
throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]");
}
fieldType().setNormalizer(normalizer);
final NamedAnalyzer searchAnalyzer;
if (fieldType().splitQueriesOnWhitespace) {
searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName);
} else {
searchAnalyzer = normalizer;
}
fieldType().setSearchAnalyzer(searchAnalyzer);
} else if (fieldType().splitQueriesOnWhitespace) {
fieldType().setSearchAnalyzer(new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()));
}
return new KeywordFieldMapper( return new KeywordFieldMapper(
name, fieldType, defaultFieldType, ignoreAbove, name, fieldType, defaultFieldType, ignoreAbove,
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
@ -147,13 +173,12 @@ public final class KeywordFieldMapper extends FieldMapper {
iterator.remove(); iterator.remove();
} else if (propName.equals("normalizer")) { } else if (propName.equals("normalizer")) {
if (propNode != null) { if (propNode != null) {
NamedAnalyzer normalizer = parserContext.getIndexAnalyzers().getNormalizer(propNode.toString()); builder.normalizer(parserContext.getIndexAnalyzers(), propNode.toString());
if (normalizer == null) {
throw new MapperParsingException("normalizer [" + propNode.toString() + "] not found for field [" + name + "]");
}
builder.normalizer(normalizer);
} }
iterator.remove(); iterator.remove();
} else if (propName.equals("split_queries_on_whitespace")) {
builder.splitQueriesOnWhitespace(XContentMapValues.nodeBooleanValue(propNode, "split_queries_on_whitespace"));
iterator.remove();
} }
} }
return builder; return builder;
@ -163,6 +188,7 @@ public final class KeywordFieldMapper extends FieldMapper {
public static final class KeywordFieldType extends StringFieldType { public static final class KeywordFieldType extends StringFieldType {
private NamedAnalyzer normalizer = null; private NamedAnalyzer normalizer = null;
private boolean splitQueriesOnWhitespace;
public KeywordFieldType() { public KeywordFieldType() {
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
@ -172,6 +198,7 @@ public final class KeywordFieldMapper extends FieldMapper {
protected KeywordFieldType(KeywordFieldType ref) { protected KeywordFieldType(KeywordFieldType ref) {
super(ref); super(ref);
this.normalizer = ref.normalizer; this.normalizer = ref.normalizer;
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace;
} }
public KeywordFieldType clone() { public KeywordFieldType clone() {
@ -183,7 +210,9 @@ public final class KeywordFieldMapper extends FieldMapper {
if (super.equals(o) == false) { if (super.equals(o) == false) {
return false; return false;
} }
return Objects.equals(normalizer, ((KeywordFieldType) o).normalizer); KeywordFieldType other = (KeywordFieldType) o;
return Objects.equals(normalizer, other.normalizer) &&
splitQueriesOnWhitespace == other.splitQueriesOnWhitespace;
} }
@Override @Override
@ -197,7 +226,7 @@ public final class KeywordFieldMapper extends FieldMapper {
@Override @Override
public int hashCode() { public int hashCode() {
return 31 * super.hashCode() + Objects.hashCode(normalizer); return 31 * super.hashCode() + Objects.hash(normalizer, splitQueriesOnWhitespace);
} }
@Override @Override
@ -214,6 +243,15 @@ public final class KeywordFieldMapper extends FieldMapper {
this.normalizer = normalizer; this.normalizer = normalizer;
} }
public boolean splitQueriesOnWhitespace() {
return splitQueriesOnWhitespace;
}
public void setSplitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) {
checkIfFrozen();
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace;
}
@Override @Override
public Query existsQuery(QueryShardContext context) { public Query existsQuery(QueryShardContext context) {
if (hasDocValues()) { if (hasDocValues()) {
@ -263,7 +301,8 @@ public final class KeywordFieldMapper extends FieldMapper {
private int ignoreAbove; private int ignoreAbove;
protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
int ignoreAbove, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { int ignoreAbove, Settings indexSettings,
MultiFields multiFields, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
this.ignoreAbove = ignoreAbove; this.ignoreAbove = ignoreAbove;
@ -366,5 +405,9 @@ public final class KeywordFieldMapper extends FieldMapper {
} else if (includeDefaults) { } else if (includeDefaults) {
builder.nullField("normalizer"); builder.nullField("normalizer");
} }
if (includeDefaults || fieldType().splitQueriesOnWhitespace) {
builder.field("split_queries_on_whitespace", fieldType().splitQueriesOnWhitespace);
}
} }
} }

View File

@ -52,6 +52,7 @@ import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.query.support.QueryParsers;
@ -262,7 +263,8 @@ public class MatchQuery {
* passing through QueryBuilder. * passing through QueryBuilder.
*/ */
boolean noForcedAnalyzer = this.analyzer == null; boolean noForcedAnalyzer = this.analyzer == null;
if (fieldType.tokenized() == false && noForcedAnalyzer) { if (fieldType.tokenized() == false && noForcedAnalyzer &&
fieldType instanceof KeywordFieldMapper.KeywordFieldType == false) {
return blendTermQuery(new Term(fieldName, value.toString()), fieldType); return blendTermQuery(new Term(fieldName, value.toString()), fieldType);
} }

View File

@ -54,6 +54,12 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
assertEquals("my_normalizer", normalizer.name()); assertEquals("my_normalizer", normalizer.name());
assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet été-là"}); assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet été-là"});
assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là")); assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là"));
normalizer = analysis.indexAnalyzers.getWhitespaceNormalizer("my_normalizer");
assertNotNull(normalizer);
assertEquals("my_normalizer", normalizer.name());
assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet", "été-là"});
assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là"));
} }
public void testUnknownType() { public void testUnknownType() {
@ -88,7 +94,13 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer"); NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
assertNotNull(normalizer); assertNotNull(normalizer);
assertEquals("my_normalizer", normalizer.name()); assertEquals("my_normalizer", normalizer.name());
assertTokenStreamContents(normalizer.tokenStream("foo", "abc"), new String[] {"zbc"}); assertTokenStreamContents(normalizer.tokenStream("foo", "abc acd"), new String[] {"zbc zcd"});
assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
normalizer = analysis.indexAnalyzers.getWhitespaceNormalizer("my_normalizer");
assertNotNull(normalizer);
assertEquals("my_normalizer", normalizer.name());
assertTokenStreamContents(normalizer.tokenStream("foo", "abc acd"), new String[] {"zbc", "zcd"});
assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc")); assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
} }

View File

@ -51,9 +51,11 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import static java.util.Collections.singletonList; import static java.util.Collections.singletonList;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
import static java.util.Collections.singletonMap; import static java.util.Collections.singletonMap;
import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
public class KeywordFieldMapperTests extends ESSingleNodeTestCase { public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
/** /**
@ -411,4 +413,59 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
); );
assertThat(e.getMessage(), containsString("name cannot be empty string")); assertThat(e.getMessage(), containsString("name cannot be empty string"));
} }
public void testSplitQueriesOnWhitespace() throws IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "keyword")
.endObject()
.startObject("field_with_normalizer")
.field("type", "keyword")
.field("normalizer", "my_lowercase")
.field("split_queries_on_whitespace", true)
.endObject()
.endObject()
.endObject().endObject());
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
MappedFieldType fieldType = indexService.mapperService().fullName("field");
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
KeywordFieldMapper.KeywordFieldType ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"Hello World"});
fieldType = indexService.mapperService().fullName("field_with_normalizer");
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
assertThat(ft.searchAnalyzer().name(), equalTo("my_lowercase"));
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"hello", "world"});
mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "keyword")
.field("split_queries_on_whitespace", true)
.endObject()
.startObject("field_with_normalizer")
.field("type", "keyword")
.field("normalizer", "my_lowercase")
.field("split_queries_on_whitespace", false)
.endObject()
.endObject()
.endObject().endObject());
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
fieldType = indexService.mapperService().fullName("field");
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"Hello", "World"});
fieldType = indexService.mapperService().fullName("field_with_normalizer");
assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class));
ft = (KeywordFieldMapper.KeywordFieldType) fieldType;
assertThat(ft.searchAnalyzer().name(), equalTo("my_lowercase"));
assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"hello world"});
}
} }

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.search;
import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queryparser.xml.builders.DisjunctionMaxQueryBuilder;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BoostQuery;
@ -33,9 +34,12 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContent;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
@ -43,12 +47,16 @@ import org.elasticsearch.index.mapper.MockFieldMapper.FakeFieldType;
import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.search.MultiMatchQuery.FieldAndFieldType; import org.elasticsearch.index.search.MultiMatchQuery.FieldAndFieldType;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.MockKeywordPlugin;
import org.junit.Before; import org.junit.Before;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -60,6 +68,11 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
private IndexService indexService; private IndexService indexService;
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Collections.singleton(MockKeywordPlugin.class);
}
@Before @Before
public void setup() throws IOException { public void setup() throws IOException {
Settings settings = Settings.builder() Settings settings = Settings.builder()
@ -276,4 +289,58 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
.build(); .build();
assertEquals(expected, query); assertEquals(expected, query);
} }
public void testKeywordSplitQueriesOnWhitespace() throws IOException {
IndexService indexService = createIndex("test_keyword", Settings.builder()
.put("index.analysis.normalizer.my_lowercase.type", "custom")
.putList("index.analysis.normalizer.my_lowercase.filter", "lowercase").build());
MapperService mapperService = indexService.mapperService();
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "keyword")
.endObject()
.startObject("field_normalizer")
.field("type", "keyword")
.field("normalizer", "my_lowercase")
.endObject()
.startObject("field_split")
.field("type", "keyword")
.field("split_queries_on_whitespace", true)
.endObject()
.startObject("field_split_normalizer")
.field("type", "keyword")
.field("normalizer", "my_lowercase")
.field("split_queries_on_whitespace", true)
.endObject()
.endObject()
.endObject().endObject());
mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE);
QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> {
throw new UnsupportedOperationException();
}, null);
MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
Map<String, Float> fieldNames = new HashMap<>();
fieldNames.put("field", 1.0f);
fieldNames.put("field_split", 1.0f);
fieldNames.put("field_normalizer", 1.0f);
fieldNames.put("field_split_normalizer", 1.0f);
Query query = parser.parse(MultiMatchQueryBuilder.Type.BEST_FIELDS, fieldNames, "Foo Bar", null);
DisjunctionMaxQuery expected = new DisjunctionMaxQuery(
Arrays.asList(
new TermQuery(new Term("field_normalizer", "foo bar")),
new TermQuery(new Term("field", "Foo Bar")),
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field_split", "Foo")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("field_split", "Bar")), BooleanClause.Occur.SHOULD)
.build(),
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field_split_normalizer", "foo")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("field_split_normalizer", "bar")), BooleanClause.Occur.SHOULD)
.build()
), 0.0f);
assertThat(query, equalTo(expected));
}
} }

View File

@ -61,7 +61,7 @@ public class TranslogHandler implements EngineConfig.TranslogRecoveryRunner {
public TranslogHandler(NamedXContentRegistry xContentRegistry, IndexSettings indexSettings) { public TranslogHandler(NamedXContentRegistry xContentRegistry, IndexSettings indexSettings) {
NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer()); NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
IndexAnalyzers indexAnalyzers = IndexAnalyzers indexAnalyzers =
new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, emptyMap(), emptyMap()); new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, emptyMap(), emptyMap(), emptyMap());
SimilarityService similarityService = new SimilarityService(indexSettings, null, emptyMap()); SimilarityService similarityService = new SimilarityService(indexSettings, null, emptyMap());
MapperRegistry mapperRegistry = new IndicesModule(emptyList()).getMapperRegistry(); MapperRegistry mapperRegistry = new IndicesModule(emptyList()).getMapperRegistry();
mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry, mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,