From 69f35aa07f1466419b950900485d709cd94bb52b Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 28 Nov 2016 14:14:01 +0100 Subject: [PATCH] Fix cross_fields type on multi_match query with synonyms (#21638) * Fix cross_fields type on multi_match query with synonyms This change fixes the cross_fields type of the multi_match query when synonyms are involved. Since 2.x the Lucene query parser creates SynonymQuery for words that appear at the same position. For simple term query the CrossFieldsQueryBuilder expands the term to all requested fields and creates a BlendedTermQuery. This change adds the same mechanism for SynonymQuery which otherwise are not expanded to all requested fields. As a side note I wonder if we should not replace the BlendedTermQuery with the SynonymQuery. They have the same purpose and behave similarly. Fixes #21633 * Fallback to SynonymQuery for blended terms on a single field --- .../index/search/MatchQuery.java | 10 ++ .../index/search/MultiMatchQuery.java | 93 ++++++++++++------- .../index/search/MultiMatchQueryTests.java | 45 ++++++++- 3 files changed, 114 insertions(+), 34 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 46eb6b7d399..008ee5ee7e9 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -31,6 +31,7 @@ import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.QueryBuilder; import org.elasticsearch.ElasticsearchException; @@ -302,6 +303,11 @@ public class MatchQuery { return blendTermQuery(term, mapper); } + @Override + protected Query newSynonymQuery(Term[] terms) { + return blendTermsQuery(terms, mapper); + } + public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) { final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop); float boost = 1; @@ -358,6 +364,10 @@ public class MatchQuery { } } + protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { + return new SynonymQuery(terms); + } + protected Query blendTermQuery(Term term, MappedFieldType fieldType) { if (fuzziness != null) { if (fieldType != null) { diff --git a/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 9ac7e2e7520..d08d4aaddc1 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -158,6 +158,10 @@ public class MultiMatchQuery extends MatchQuery { return MultiMatchQuery.super.blendTermQuery(term, fieldType); } + public Query blendTerms(Term[] terms, MappedFieldType fieldType) { + return MultiMatchQuery.super.blendTermsQuery(terms, fieldType); + } + public Query termQuery(MappedFieldType fieldType, Object value) { return MultiMatchQuery.this.termQuery(fieldType, value, lenient); } @@ -223,6 +227,18 @@ public class MultiMatchQuery extends MatchQuery { return queries.isEmpty() ? null : queries; } + @Override + public Query blendTerms(Term[] terms, MappedFieldType fieldType) { + if (blendedFields == null || blendedFields.length == 1) { + return super.blendTerms(terms, fieldType); + } + BytesRef[] values = new BytesRef[terms.length]; + for (int i = 0; i < terms.length; i++) { + values[i] = terms[i].bytes(); + } + return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, blendedFields); + } + @Override public Query blendTerm(Term term, MappedFieldType fieldType) { if (blendedFields == null) { @@ -243,44 +259,51 @@ public class MultiMatchQuery extends MatchQuery { } static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker, + FieldAndFieldType... blendedFields) { + return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, blendedFields); + } + + static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) { List queries = new ArrayList<>(); - Term[] terms = new Term[blendedFields.length]; - float[] blendedBoost = new float[blendedFields.length]; + Term[] terms = new Term[blendedFields.length * values.length]; + float[] blendedBoost = new float[blendedFields.length * values.length]; int i = 0; for (FieldAndFieldType ft : blendedFields) { - Query query; - try { - query = ft.fieldType.termQuery(value, context); - } catch (IllegalArgumentException e) { - // the query expects a certain class of values such as numbers - // of ip addresses and the value can't be parsed, so ignore this - // field - continue; - } catch (ElasticsearchParseException parseException) { - // date fields throw an ElasticsearchParseException with the - // underlying IAE as the cause, ignore this field if that is - // the case - if (parseException.getCause() instanceof IllegalArgumentException) { + for (BytesRef term : values) { + Query query; + try { + query = ft.fieldType.termQuery(term, context); + } catch (IllegalArgumentException e) { + // the query expects a certain class of values such as numbers + // of ip addresses and the value can't be parsed, so ignore this + // field continue; + } catch (ElasticsearchParseException parseException) { + // date fields throw an ElasticsearchParseException with the + // underlying IAE as the cause, ignore this field if that is + // the case + if (parseException.getCause() instanceof IllegalArgumentException) { + continue; + } + throw parseException; } - throw parseException; - } - float boost = ft.boost; - while (query instanceof BoostQuery) { - BoostQuery bq = (BoostQuery) query; - query = bq.getQuery(); - boost *= bq.getBoost(); - } - if (query.getClass() == TermQuery.class) { - terms[i] = ((TermQuery) query).getTerm(); - blendedBoost[i] = boost; - i++; - } else { - if (boost != 1f) { - query = new BoostQuery(query, boost); + float boost = ft.boost; + while (query instanceof BoostQuery) { + BoostQuery bq = (BoostQuery) query; + query = bq.getQuery(); + boost *= bq.getBoost(); + } + if (query.getClass() == TermQuery.class) { + terms[i] = ((TermQuery) query).getTerm(); + blendedBoost[i] = boost; + i++; + } else { + if (boost != 1f) { + query = new BoostQuery(query, boost); + } + queries.add(query); } - queries.add(query); } } if (i > 0) { @@ -317,6 +340,14 @@ public class MultiMatchQuery extends MatchQuery { return queryBuilder.blendTerm(term, fieldType); } + @Override + protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { + if (queryBuilder == null) { + return super.blendTermsQuery(terms, fieldType); + } + return queryBuilder.blendTerms(terms, fieldType); + } + static final class FieldAndFieldType { final MappedFieldType fieldType; final float boost; diff --git a/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java b/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java index 7d7b7a4cd6e..992667a5056 100644 --- a/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java +++ b/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java @@ -28,10 +28,12 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService; @@ -55,7 +57,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { @Before public void setup() throws IOException { - IndexService indexService = createIndex("test"); + Settings settings = Settings.builder() + .put("index.analysis.filter.syns.type","synonym") + .putArray("index.analysis.filter.syns.synonyms","quick,fast") + .put("index.analysis.analyzer.syns.tokenizer","standard") + .put("index.analysis.analyzer.syns.filter","syns").build(); + IndexService indexService = createIndex("test", settings); MapperService mapperService = indexService.mapperService(); String mapping = "{\n" + " \"person\":{\n" + @@ -63,10 +70,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { " \"name\":{\n" + " \"properties\":{\n" + " \"first\": {\n" + - " \"type\":\"text\"\n" + + " \"type\":\"text\",\n" + + " \"analyzer\":\"syns\"\n" + " }," + " \"last\": {\n" + - " \"type\":\"text\"\n" + + " \"type\":\"text\",\n" + + " \"analyzer\":\"syns\"\n" + " }" + " }" + " }\n" + @@ -176,4 +185,34 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class)); assertThat(parsedQuery.toString(), equalTo("_all:\"foo*\"")); } + + public void testMultiMatchCrossFieldsWithSynonyms() throws IOException { + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { throw new UnsupportedOperationException(); }); + + // check that synonym query is used for a single field + Query parsedQuery = + multiMatchQuery("quick").field("name.first") + .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + Term[] terms = new Term[2]; + terms[0] = new Term("name.first", "quick"); + terms[1] = new Term("name.first", "fast"); + Query expectedQuery = new SynonymQuery(terms); + assertThat(parsedQuery, equalTo(expectedQuery)); + + // check that blended term query is used for multiple fields + parsedQuery = + multiMatchQuery("quick").field("name.first").field("name.last") + .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + terms = new Term[4]; + terms[0] = new Term("name.first", "quick"); + terms[1] = new Term("name.first", "fast"); + terms[2] = new Term("name.last", "quick"); + terms[3] = new Term("name.last", "fast"); + float[] boosts = new float[4]; + Arrays.fill(boosts, 1.0f); + expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); + assertThat(parsedQuery, equalTo(expectedQuery)); + + } }