From 0791f93dbd1c2e3626a0b7d2a513d1c444f9d493 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 1 Jun 2018 09:47:03 +0200 Subject: [PATCH] Add an option to split keyword field on whitespace at query time (#30691) This change adds an option named `split_queries_on_whitespace` to the `keyword` field type. When set to true full text queries (`match`, `multi_match`, `query_string`, ...) that target the field will split the input on whitespace to build the query terms. Defaults to `false`. Closes #30393 --- docs/reference/mapping/types/keyword.asciidoc | 6 ++ .../metadata/MetaDataIndexUpgradeService.java | 2 +- .../index/analysis/AnalysisRegistry.java | 15 +++-- .../analysis/CustomNormalizerProvider.java | 11 ++- .../index/analysis/IndexAnalyzers.java | 11 ++- .../index/mapper/KeywordFieldMapper.java | 65 +++++++++++++++--- .../index/search/MatchQuery.java | 4 +- .../index/analysis/CustomNormalizerTests.java | 14 +++- .../index/mapper/KeywordFieldMapperTests.java | 57 ++++++++++++++++ .../index/search/MultiMatchQueryTests.java | 67 +++++++++++++++++++ .../index/engine/TranslogHandler.java | 2 +- 11 files changed, 227 insertions(+), 27 deletions(-) diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index c7b35d7315e..09d540feed1 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -103,6 +103,12 @@ The following parameters are accepted by `keyword` fields: How to pre-process the keyword prior to indexing. Defaults to `null`, meaning the keyword is kept as-is. +`split_queries_on_whitespace`:: + + Whether <> should split the input on whitespace + when building a query for this field. + Accepts `true` or `false` (default). + NOTE: Indexes imported from 2.x do not support `keyword`. Instead they will attempt to downgrade `keyword` into `string`. This allows you to merge modern mappings with legacy mappings. Long lived indexes will have to be recreated diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index 6d18f5e01b5..3ab20f62237 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -186,7 +186,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent { return Collections.emptySet(); } }; - try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap)) { + try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap, analyzerMap)) { MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService, mapperRegistry, () -> null); mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 77be68fbbe2..e421a19b2ac 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; @@ -453,13 +454,16 @@ public final class AnalysisRegistry implements Closeable { analyzerProviders = new HashMap<>(analyzerProviders); Map analyzers = new HashMap<>(); Map normalizers = new HashMap<>(); + Map whitespaceNormalizers = new HashMap<>(); for (Map.Entry> entry : analyzerProviders.entrySet()) { processAnalyzerFactory(indexSettings, entry.getKey(), entry.getValue(), analyzers, tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories); } for (Map.Entry> entry : normalizerProviders.entrySet()) { processNormalizerFactory(entry.getKey(), entry.getValue(), normalizers, - tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories); + "keyword", tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories); + processNormalizerFactory(entry.getKey(), entry.getValue(), whitespaceNormalizers, + "whitespace", () -> new WhitespaceTokenizer(), tokenFilterFactoryFactories, charFilterFactoryFactories); } if (!analyzers.containsKey("default")) { @@ -489,7 +493,7 @@ public final class AnalysisRegistry implements Closeable { } } return new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultSearchAnalyzer, defaultSearchQuoteAnalyzer, - unmodifiableMap(analyzers), unmodifiableMap(normalizers)); + unmodifiableMap(analyzers), unmodifiableMap(normalizers), unmodifiableMap(whitespaceNormalizers)); } private void processAnalyzerFactory(IndexSettings indexSettings, @@ -545,15 +549,16 @@ public final class AnalysisRegistry implements Closeable { String name, AnalyzerProvider normalizerFactory, Map normalizers, - TokenizerFactory keywordTokenizerFactory, + String tokenizerName, + TokenizerFactory tokenizerFactory, Map tokenFilters, Map charFilters) { - if (keywordTokenizerFactory == null) { + if (tokenizerFactory == null) { throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module"); } if (normalizerFactory instanceof CustomNormalizerProvider) { - ((CustomNormalizerProvider) normalizerFactory).build(keywordTokenizerFactory, charFilters, tokenFilters); + ((CustomNormalizerProvider) normalizerFactory).build(tokenizerName, tokenizerFactory, charFilters, tokenFilters); } Analyzer normalizerF = normalizerFactory.get(); if (normalizerF == null) { diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java b/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java index a0a7859d50c..13946be3a8d 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java @@ -38,15 +38,14 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide private CustomAnalyzer customAnalyzer; public CustomNormalizerProvider(IndexSettings indexSettings, - String name, Settings settings) { + String name, Settings settings) { super(indexSettings, name, settings); this.analyzerSettings = settings; } - public void build(final TokenizerFactory keywordTokenizerFactory, final Map charFilters, + public void build(final String tokenizerName, final TokenizerFactory tokenizerFactory, final Map charFilters, final Map tokenFilters) { - String tokenizerName = analyzerSettings.get("tokenizer"); - if (tokenizerName != null) { + if (analyzerSettings.get("tokenizer") != null) { throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer"); } @@ -82,8 +81,8 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide } this.customAnalyzer = new CustomAnalyzer( - "keyword", - keywordTokenizerFactory, + tokenizerName, + tokenizerFactory, charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]), tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()]) ); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/IndexAnalyzers.java b/server/src/main/java/org/elasticsearch/index/analysis/IndexAnalyzers.java index 5131c51213f..f205fd05994 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/IndexAnalyzers.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/IndexAnalyzers.java @@ -40,11 +40,12 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos private final NamedAnalyzer defaultSearchQuoteAnalyzer; private final Map analyzers; private final Map normalizers; + private final Map whitespaceNormalizers; private final IndexSettings indexSettings; public IndexAnalyzers(IndexSettings indexSettings, NamedAnalyzer defaultIndexAnalyzer, NamedAnalyzer defaultSearchAnalyzer, NamedAnalyzer defaultSearchQuoteAnalyzer, Map analyzers, - Map normalizers) { + Map normalizers, Map whitespaceNormalizers) { super(indexSettings); if (defaultIndexAnalyzer.name().equals("default") == false) { throw new IllegalStateException("default analyzer must have the name [default] but was: [" + defaultIndexAnalyzer.name() + "]"); @@ -54,6 +55,7 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer; this.analyzers = analyzers; this.normalizers = normalizers; + this.whitespaceNormalizers = whitespaceNormalizers; this.indexSettings = indexSettings; } @@ -71,6 +73,13 @@ public final class IndexAnalyzers extends AbstractIndexComponent implements Clos return normalizers.get(name); } + /** + * Returns a normalizer that splits on whitespace mapped to the given name or null if not present + */ + public NamedAnalyzer getWhitespaceNormalizer(String name) { + return whitespaceNormalizers.get(name); + } + /** * Returns the default index analyzer for this index */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 8c3a0923d1d..de2aee326c7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; @@ -35,6 +36,8 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.analysis.AnalyzerScope; +import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; @@ -73,6 +76,8 @@ public final class KeywordFieldMapper extends FieldMapper { protected String nullValue = Defaults.NULL_VALUE; protected int ignoreAbove = Defaults.IGNORE_ABOVE; + private IndexAnalyzers indexAnalyzers; + private String normalizerName; public Builder(String name) { super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); @@ -106,15 +111,36 @@ public final class KeywordFieldMapper extends FieldMapper { return builder; } - public Builder normalizer(NamedAnalyzer normalizer) { - fieldType().setNormalizer(normalizer); - fieldType().setSearchAnalyzer(normalizer); + public Builder splitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) { + fieldType().setSplitQueriesOnWhitespace(splitQueriesOnWhitespace); + return builder; + } + + public Builder normalizer(IndexAnalyzers indexAnalyzers, String name) { + this.indexAnalyzers = indexAnalyzers; + this.normalizerName = name; return builder; } @Override public KeywordFieldMapper build(BuilderContext context) { setupFieldType(context); + if (normalizerName != null) { + NamedAnalyzer normalizer = indexAnalyzers.getNormalizer(normalizerName); + if (normalizer == null) { + throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]"); + } + fieldType().setNormalizer(normalizer); + final NamedAnalyzer searchAnalyzer; + if (fieldType().splitQueriesOnWhitespace) { + searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName); + } else { + searchAnalyzer = normalizer; + } + fieldType().setSearchAnalyzer(searchAnalyzer); + } else if (fieldType().splitQueriesOnWhitespace) { + fieldType().setSearchAnalyzer(new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer())); + } return new KeywordFieldMapper( name, fieldType, defaultFieldType, ignoreAbove, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); @@ -147,13 +173,12 @@ public final class KeywordFieldMapper extends FieldMapper { iterator.remove(); } else if (propName.equals("normalizer")) { if (propNode != null) { - NamedAnalyzer normalizer = parserContext.getIndexAnalyzers().getNormalizer(propNode.toString()); - if (normalizer == null) { - throw new MapperParsingException("normalizer [" + propNode.toString() + "] not found for field [" + name + "]"); - } - builder.normalizer(normalizer); + builder.normalizer(parserContext.getIndexAnalyzers(), propNode.toString()); } iterator.remove(); + } else if (propName.equals("split_queries_on_whitespace")) { + builder.splitQueriesOnWhitespace(XContentMapValues.nodeBooleanValue(propNode, "split_queries_on_whitespace")); + iterator.remove(); } } return builder; @@ -163,6 +188,7 @@ public final class KeywordFieldMapper extends FieldMapper { public static final class KeywordFieldType extends StringFieldType { private NamedAnalyzer normalizer = null; + private boolean splitQueriesOnWhitespace; public KeywordFieldType() { setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); @@ -172,6 +198,7 @@ public final class KeywordFieldMapper extends FieldMapper { protected KeywordFieldType(KeywordFieldType ref) { super(ref); this.normalizer = ref.normalizer; + this.splitQueriesOnWhitespace = splitQueriesOnWhitespace; } public KeywordFieldType clone() { @@ -183,7 +210,9 @@ public final class KeywordFieldMapper extends FieldMapper { if (super.equals(o) == false) { return false; } - return Objects.equals(normalizer, ((KeywordFieldType) o).normalizer); + KeywordFieldType other = (KeywordFieldType) o; + return Objects.equals(normalizer, other.normalizer) && + splitQueriesOnWhitespace == other.splitQueriesOnWhitespace; } @Override @@ -197,7 +226,7 @@ public final class KeywordFieldMapper extends FieldMapper { @Override public int hashCode() { - return 31 * super.hashCode() + Objects.hashCode(normalizer); + return 31 * super.hashCode() + Objects.hash(normalizer, splitQueriesOnWhitespace); } @Override @@ -214,6 +243,15 @@ public final class KeywordFieldMapper extends FieldMapper { this.normalizer = normalizer; } + public boolean splitQueriesOnWhitespace() { + return splitQueriesOnWhitespace; + } + + public void setSplitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) { + checkIfFrozen(); + this.splitQueriesOnWhitespace = splitQueriesOnWhitespace; + } + @Override public Query existsQuery(QueryShardContext context) { if (hasDocValues()) { @@ -263,7 +301,8 @@ public final class KeywordFieldMapper extends FieldMapper { private int ignoreAbove; protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, - int ignoreAbove, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { + int ignoreAbove, Settings indexSettings, + MultiFields multiFields, CopyTo copyTo) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; this.ignoreAbove = ignoreAbove; @@ -366,5 +405,9 @@ public final class KeywordFieldMapper extends FieldMapper { } else if (includeDefaults) { builder.nullField("normalizer"); } + + if (includeDefaults || fieldType().splitQueriesOnWhitespace) { + builder.field("split_queries_on_whitespace", fieldType().splitQueriesOnWhitespace); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 4436d560bd2..7765be215aa 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -52,6 +52,7 @@ import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.support.QueryParsers; @@ -262,7 +263,8 @@ public class MatchQuery { * passing through QueryBuilder. */ boolean noForcedAnalyzer = this.analyzer == null; - if (fieldType.tokenized() == false && noForcedAnalyzer) { + if (fieldType.tokenized() == false && noForcedAnalyzer && + fieldType instanceof KeywordFieldMapper.KeywordFieldType == false) { return blendTermQuery(new Term(fieldName, value.toString()), fieldType); } diff --git a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java index e2025145241..0a1f625f203 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java @@ -54,6 +54,12 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase { assertEquals("my_normalizer", normalizer.name()); assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet été-là"}); assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là")); + + normalizer = analysis.indexAnalyzers.getWhitespaceNormalizer("my_normalizer"); + assertNotNull(normalizer); + assertEquals("my_normalizer", normalizer.name()); + assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet", "été-là"}); + assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là")); } public void testUnknownType() { @@ -88,7 +94,13 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase { NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer"); assertNotNull(normalizer); assertEquals("my_normalizer", normalizer.name()); - assertTokenStreamContents(normalizer.tokenStream("foo", "abc"), new String[] {"zbc"}); + assertTokenStreamContents(normalizer.tokenStream("foo", "abc acd"), new String[] {"zbc zcd"}); + assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc")); + + normalizer = analysis.indexAnalyzers.getWhitespaceNormalizer("my_normalizer"); + assertNotNull(normalizer); + assertEquals("my_normalizer", normalizer.name()); + assertTokenStreamContents(normalizer.tokenStream("foo", "abc acd"), new String[] {"zbc", "zcd"}); assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc")); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 86cf7b4b766..56e587dc995 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -51,9 +51,11 @@ import java.util.List; import java.util.Map; import static java.util.Collections.singletonList; +import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static java.util.Collections.singletonMap; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; public class KeywordFieldMapperTests extends ESSingleNodeTestCase { /** @@ -411,4 +413,59 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase { ); assertThat(e.getMessage(), containsString("name cannot be empty string")); } + + public void testSplitQueriesOnWhitespace() throws IOException { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .endObject() + .startObject("field_with_normalizer") + .field("type", "keyword") + .field("normalizer", "my_lowercase") + .field("split_queries_on_whitespace", true) + .endObject() + .endObject() + .endObject().endObject()); + indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + + MappedFieldType fieldType = indexService.mapperService().fullName("field"); + assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); + KeywordFieldMapper.KeywordFieldType ft = (KeywordFieldMapper.KeywordFieldType) fieldType; + assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"Hello World"}); + + fieldType = indexService.mapperService().fullName("field_with_normalizer"); + assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); + ft = (KeywordFieldMapper.KeywordFieldType) fieldType; + assertThat(ft.searchAnalyzer().name(), equalTo("my_lowercase")); + assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"hello", "world"}); + + mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .field("split_queries_on_whitespace", true) + .endObject() + .startObject("field_with_normalizer") + .field("type", "keyword") + .field("normalizer", "my_lowercase") + .field("split_queries_on_whitespace", false) + .endObject() + .endObject() + .endObject().endObject()); + indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + + fieldType = indexService.mapperService().fullName("field"); + assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); + ft = (KeywordFieldMapper.KeywordFieldType) fieldType; + assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"Hello", "World"}); + + fieldType = indexService.mapperService().fullName("field_with_normalizer"); + assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); + ft = (KeywordFieldMapper.KeywordFieldType) fieldType; + assertThat(ft.searchAnalyzer().name(), equalTo("my_lowercase")); + assertTokenStreamContents(ft.searchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] {"hello world"}); + } } diff --git a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java index dbc6294920e..19973f4eb98 100644 --- a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.search; import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queries.BlendedTermQuery; +import org.apache.lucene.queryparser.xml.builders.DisjunctionMaxQueryBuilder; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; @@ -33,9 +34,12 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContent; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService; @@ -43,12 +47,16 @@ import org.elasticsearch.index.mapper.MockFieldMapper.FakeFieldType; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.search.MultiMatchQuery.FieldAndFieldType; +import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.MockKeywordPlugin; import org.junit.Before; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -60,6 +68,11 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { private IndexService indexService; + @Override + protected Collection> getPlugins() { + return Collections.singleton(MockKeywordPlugin.class); + } + @Before public void setup() throws IOException { Settings settings = Settings.builder() @@ -276,4 +289,58 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { .build(); assertEquals(expected, query); } + + public void testKeywordSplitQueriesOnWhitespace() throws IOException { + IndexService indexService = createIndex("test_keyword", Settings.builder() + .put("index.analysis.normalizer.my_lowercase.type", "custom") + .putList("index.analysis.normalizer.my_lowercase.filter", "lowercase").build()); + MapperService mapperService = indexService.mapperService(); + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .endObject() + .startObject("field_normalizer") + .field("type", "keyword") + .field("normalizer", "my_lowercase") + .endObject() + .startObject("field_split") + .field("type", "keyword") + .field("split_queries_on_whitespace", true) + .endObject() + .startObject("field_split_normalizer") + .field("type", "keyword") + .field("normalizer", "my_lowercase") + .field("split_queries_on_whitespace", true) + .endObject() + .endObject() + .endObject().endObject()); + mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { + throw new UnsupportedOperationException(); + }, null); + MultiMatchQuery parser = new MultiMatchQuery(queryShardContext); + Map fieldNames = new HashMap<>(); + fieldNames.put("field", 1.0f); + fieldNames.put("field_split", 1.0f); + fieldNames.put("field_normalizer", 1.0f); + fieldNames.put("field_split_normalizer", 1.0f); + Query query = parser.parse(MultiMatchQueryBuilder.Type.BEST_FIELDS, fieldNames, "Foo Bar", null); + DisjunctionMaxQuery expected = new DisjunctionMaxQuery( + Arrays.asList( + new TermQuery(new Term("field_normalizer", "foo bar")), + new TermQuery(new Term("field", "Foo Bar")), + new BooleanQuery.Builder() + .add(new TermQuery(new Term("field_split", "Foo")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field_split", "Bar")), BooleanClause.Occur.SHOULD) + .build(), + new BooleanQuery.Builder() + .add(new TermQuery(new Term("field_split_normalizer", "foo")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field_split_normalizer", "bar")), BooleanClause.Occur.SHOULD) + .build() + ), 0.0f); + assertThat(query, equalTo(expected)); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/TranslogHandler.java b/test/framework/src/main/java/org/elasticsearch/index/engine/TranslogHandler.java index 4fe18fa9738..53fe89ac17e 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/TranslogHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/TranslogHandler.java @@ -61,7 +61,7 @@ public class TranslogHandler implements EngineConfig.TranslogRecoveryRunner { public TranslogHandler(NamedXContentRegistry xContentRegistry, IndexSettings indexSettings) { NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer()); IndexAnalyzers indexAnalyzers = - new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, emptyMap(), emptyMap()); + new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, emptyMap(), emptyMap(), emptyMap()); SimilarityService similarityService = new SimilarityService(indexSettings, null, emptyMap()); MapperRegistry mapperRegistry = new IndicesModule(emptyList()).getMapperRegistry(); mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,