Fix cross_fields type on multi_match query with synonyms (#21638)
* Fix cross_fields type on multi_match query with synonyms This change fixes the cross_fields type of the multi_match query when synonyms are involved. Since 2.x the Lucene query parser creates SynonymQuery for words that appear at the same position. For simple term query the CrossFieldsQueryBuilder expands the term to all requested fields and creates a BlendedTermQuery. This change adds the same mechanism for SynonymQuery which otherwise are not expanded to all requested fields. As a side note I wonder if we should not replace the BlendedTermQuery with the SynonymQuery. They have the same purpose and behave similarly. Fixes #21633 * Fallback to SynonymQuery for blended terms on a single field
This commit is contained in:
parent
8390648709
commit
69f35aa07f
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
import org.apache.lucene.search.MultiTermQuery;
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.SynonymQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.util.QueryBuilder;
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
|
@ -302,6 +303,11 @@ public class MatchQuery {
|
||||||
return blendTermQuery(term, mapper);
|
return blendTermQuery(term, mapper);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query newSynonymQuery(Term[] terms) {
|
||||||
|
return blendTermsQuery(terms, mapper);
|
||||||
|
}
|
||||||
|
|
||||||
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
|
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
|
||||||
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
|
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
|
||||||
float boost = 1;
|
float boost = 1;
|
||||||
|
@ -358,6 +364,10 @@ public class MatchQuery {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
||||||
|
return new SynonymQuery(terms);
|
||||||
|
}
|
||||||
|
|
||||||
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
|
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
|
||||||
if (fuzziness != null) {
|
if (fuzziness != null) {
|
||||||
if (fieldType != null) {
|
if (fieldType != null) {
|
||||||
|
|
|
@ -158,6 +158,10 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
return MultiMatchQuery.super.blendTermQuery(term, fieldType);
|
return MultiMatchQuery.super.blendTermQuery(term, fieldType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
|
||||||
|
return MultiMatchQuery.super.blendTermsQuery(terms, fieldType);
|
||||||
|
}
|
||||||
|
|
||||||
public Query termQuery(MappedFieldType fieldType, Object value) {
|
public Query termQuery(MappedFieldType fieldType, Object value) {
|
||||||
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
|
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
|
||||||
}
|
}
|
||||||
|
@ -223,6 +227,18 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
return queries.isEmpty() ? null : queries;
|
return queries.isEmpty() ? null : queries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
|
||||||
|
if (blendedFields == null || blendedFields.length == 1) {
|
||||||
|
return super.blendTerms(terms, fieldType);
|
||||||
|
}
|
||||||
|
BytesRef[] values = new BytesRef[terms.length];
|
||||||
|
for (int i = 0; i < terms.length; i++) {
|
||||||
|
values[i] = terms[i].bytes();
|
||||||
|
}
|
||||||
|
return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, blendedFields);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query blendTerm(Term term, MappedFieldType fieldType) {
|
public Query blendTerm(Term term, MappedFieldType fieldType) {
|
||||||
if (blendedFields == null) {
|
if (blendedFields == null) {
|
||||||
|
@ -244,14 +260,20 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
|
|
||||||
static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
|
static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
|
||||||
FieldAndFieldType... blendedFields) {
|
FieldAndFieldType... blendedFields) {
|
||||||
|
return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, blendedFields);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker,
|
||||||
|
FieldAndFieldType... blendedFields) {
|
||||||
List<Query> queries = new ArrayList<>();
|
List<Query> queries = new ArrayList<>();
|
||||||
Term[] terms = new Term[blendedFields.length];
|
Term[] terms = new Term[blendedFields.length * values.length];
|
||||||
float[] blendedBoost = new float[blendedFields.length];
|
float[] blendedBoost = new float[blendedFields.length * values.length];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (FieldAndFieldType ft : blendedFields) {
|
for (FieldAndFieldType ft : blendedFields) {
|
||||||
|
for (BytesRef term : values) {
|
||||||
Query query;
|
Query query;
|
||||||
try {
|
try {
|
||||||
query = ft.fieldType.termQuery(value, context);
|
query = ft.fieldType.termQuery(term, context);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
// the query expects a certain class of values such as numbers
|
// the query expects a certain class of values such as numbers
|
||||||
// of ip addresses and the value can't be parsed, so ignore this
|
// of ip addresses and the value can't be parsed, so ignore this
|
||||||
|
@ -283,6 +305,7 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
queries.add(query);
|
queries.add(query);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
terms = Arrays.copyOf(terms, i);
|
terms = Arrays.copyOf(terms, i);
|
||||||
blendedBoost = Arrays.copyOf(blendedBoost, i);
|
blendedBoost = Arrays.copyOf(blendedBoost, i);
|
||||||
|
@ -317,6 +340,14 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
return queryBuilder.blendTerm(term, fieldType);
|
return queryBuilder.blendTerm(term, fieldType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
||||||
|
if (queryBuilder == null) {
|
||||||
|
return super.blendTermsQuery(terms, fieldType);
|
||||||
|
}
|
||||||
|
return queryBuilder.blendTerms(terms, fieldType);
|
||||||
|
}
|
||||||
|
|
||||||
static final class FieldAndFieldType {
|
static final class FieldAndFieldType {
|
||||||
final MappedFieldType fieldType;
|
final MappedFieldType fieldType;
|
||||||
final float boost;
|
final float boost;
|
||||||
|
|
|
@ -28,10 +28,12 @@ import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.SynonymQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.common.compress.CompressedXContent;
|
import org.elasticsearch.common.compress.CompressedXContent;
|
||||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
import org.elasticsearch.index.engine.Engine;
|
import org.elasticsearch.index.engine.Engine;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
|
@ -55,7 +57,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
IndexService indexService = createIndex("test");
|
Settings settings = Settings.builder()
|
||||||
|
.put("index.analysis.filter.syns.type","synonym")
|
||||||
|
.putArray("index.analysis.filter.syns.synonyms","quick,fast")
|
||||||
|
.put("index.analysis.analyzer.syns.tokenizer","standard")
|
||||||
|
.put("index.analysis.analyzer.syns.filter","syns").build();
|
||||||
|
IndexService indexService = createIndex("test", settings);
|
||||||
MapperService mapperService = indexService.mapperService();
|
MapperService mapperService = indexService.mapperService();
|
||||||
String mapping = "{\n" +
|
String mapping = "{\n" +
|
||||||
" \"person\":{\n" +
|
" \"person\":{\n" +
|
||||||
|
@ -63,10 +70,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
||||||
" \"name\":{\n" +
|
" \"name\":{\n" +
|
||||||
" \"properties\":{\n" +
|
" \"properties\":{\n" +
|
||||||
" \"first\": {\n" +
|
" \"first\": {\n" +
|
||||||
" \"type\":\"text\"\n" +
|
" \"type\":\"text\",\n" +
|
||||||
|
" \"analyzer\":\"syns\"\n" +
|
||||||
" }," +
|
" }," +
|
||||||
" \"last\": {\n" +
|
" \"last\": {\n" +
|
||||||
" \"type\":\"text\"\n" +
|
" \"type\":\"text\",\n" +
|
||||||
|
" \"analyzer\":\"syns\"\n" +
|
||||||
" }" +
|
" }" +
|
||||||
" }" +
|
" }" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
|
@ -176,4 +185,34 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
||||||
assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class));
|
assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class));
|
||||||
assertThat(parsedQuery.toString(), equalTo("_all:\"foo*\""));
|
assertThat(parsedQuery.toString(), equalTo("_all:\"foo*\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMultiMatchCrossFieldsWithSynonyms() throws IOException {
|
||||||
|
QueryShardContext queryShardContext = indexService.newQueryShardContext(
|
||||||
|
randomInt(20), null, () -> { throw new UnsupportedOperationException(); });
|
||||||
|
|
||||||
|
// check that synonym query is used for a single field
|
||||||
|
Query parsedQuery =
|
||||||
|
multiMatchQuery("quick").field("name.first")
|
||||||
|
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
|
||||||
|
Term[] terms = new Term[2];
|
||||||
|
terms[0] = new Term("name.first", "quick");
|
||||||
|
terms[1] = new Term("name.first", "fast");
|
||||||
|
Query expectedQuery = new SynonymQuery(terms);
|
||||||
|
assertThat(parsedQuery, equalTo(expectedQuery));
|
||||||
|
|
||||||
|
// check that blended term query is used for multiple fields
|
||||||
|
parsedQuery =
|
||||||
|
multiMatchQuery("quick").field("name.first").field("name.last")
|
||||||
|
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
|
||||||
|
terms = new Term[4];
|
||||||
|
terms[0] = new Term("name.first", "quick");
|
||||||
|
terms[1] = new Term("name.first", "fast");
|
||||||
|
terms[2] = new Term("name.last", "quick");
|
||||||
|
terms[3] = new Term("name.last", "fast");
|
||||||
|
float[] boosts = new float[4];
|
||||||
|
Arrays.fill(boosts, 1.0f);
|
||||||
|
expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f);
|
||||||
|
assertThat(parsedQuery, equalTo(expectedQuery));
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue