Fix cross_fields type on multi_match query with synonyms (#21638)
* Fix cross_fields type on multi_match query with synonyms This change fixes the cross_fields type of the multi_match query when synonyms are involved. Since 2.x the Lucene query parser creates SynonymQuery for words that appear at the same position. For simple term query the CrossFieldsQueryBuilder expands the term to all requested fields and creates a BlendedTermQuery. This change adds the same mechanism for SynonymQuery which otherwise are not expanded to all requested fields. As a side note I wonder if we should not replace the BlendedTermQuery with the SynonymQuery. They have the same purpose and behave similarly. Fixes #21633 * Fallback to SynonymQuery for blended terms on a single field
This commit is contained in:
parent
8390648709
commit
69f35aa07f
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
|
|||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.QueryBuilder;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
|
@ -302,6 +303,11 @@ public class MatchQuery {
|
|||
return blendTermQuery(term, mapper);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newSynonymQuery(Term[] terms) {
|
||||
return blendTermsQuery(terms, mapper);
|
||||
}
|
||||
|
||||
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
|
||||
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
|
||||
float boost = 1;
|
||||
|
@ -358,6 +364,10 @@ public class MatchQuery {
|
|||
}
|
||||
}
|
||||
|
||||
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
||||
return new SynonymQuery(terms);
|
||||
}
|
||||
|
||||
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
|
||||
if (fuzziness != null) {
|
||||
if (fieldType != null) {
|
||||
|
|
|
@ -158,6 +158,10 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
return MultiMatchQuery.super.blendTermQuery(term, fieldType);
|
||||
}
|
||||
|
||||
public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
|
||||
return MultiMatchQuery.super.blendTermsQuery(terms, fieldType);
|
||||
}
|
||||
|
||||
public Query termQuery(MappedFieldType fieldType, Object value) {
|
||||
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
|
||||
}
|
||||
|
@ -223,6 +227,18 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
return queries.isEmpty() ? null : queries;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
|
||||
if (blendedFields == null || blendedFields.length == 1) {
|
||||
return super.blendTerms(terms, fieldType);
|
||||
}
|
||||
BytesRef[] values = new BytesRef[terms.length];
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
values[i] = terms[i].bytes();
|
||||
}
|
||||
return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, blendedFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query blendTerm(Term term, MappedFieldType fieldType) {
|
||||
if (blendedFields == null) {
|
||||
|
@ -243,44 +259,51 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
}
|
||||
|
||||
static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
|
||||
FieldAndFieldType... blendedFields) {
|
||||
return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, blendedFields);
|
||||
}
|
||||
|
||||
static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker,
|
||||
FieldAndFieldType... blendedFields) {
|
||||
List<Query> queries = new ArrayList<>();
|
||||
Term[] terms = new Term[blendedFields.length];
|
||||
float[] blendedBoost = new float[blendedFields.length];
|
||||
Term[] terms = new Term[blendedFields.length * values.length];
|
||||
float[] blendedBoost = new float[blendedFields.length * values.length];
|
||||
int i = 0;
|
||||
for (FieldAndFieldType ft : blendedFields) {
|
||||
Query query;
|
||||
try {
|
||||
query = ft.fieldType.termQuery(value, context);
|
||||
} catch (IllegalArgumentException e) {
|
||||
// the query expects a certain class of values such as numbers
|
||||
// of ip addresses and the value can't be parsed, so ignore this
|
||||
// field
|
||||
continue;
|
||||
} catch (ElasticsearchParseException parseException) {
|
||||
// date fields throw an ElasticsearchParseException with the
|
||||
// underlying IAE as the cause, ignore this field if that is
|
||||
// the case
|
||||
if (parseException.getCause() instanceof IllegalArgumentException) {
|
||||
for (BytesRef term : values) {
|
||||
Query query;
|
||||
try {
|
||||
query = ft.fieldType.termQuery(term, context);
|
||||
} catch (IllegalArgumentException e) {
|
||||
// the query expects a certain class of values such as numbers
|
||||
// of ip addresses and the value can't be parsed, so ignore this
|
||||
// field
|
||||
continue;
|
||||
} catch (ElasticsearchParseException parseException) {
|
||||
// date fields throw an ElasticsearchParseException with the
|
||||
// underlying IAE as the cause, ignore this field if that is
|
||||
// the case
|
||||
if (parseException.getCause() instanceof IllegalArgumentException) {
|
||||
continue;
|
||||
}
|
||||
throw parseException;
|
||||
}
|
||||
throw parseException;
|
||||
}
|
||||
float boost = ft.boost;
|
||||
while (query instanceof BoostQuery) {
|
||||
BoostQuery bq = (BoostQuery) query;
|
||||
query = bq.getQuery();
|
||||
boost *= bq.getBoost();
|
||||
}
|
||||
if (query.getClass() == TermQuery.class) {
|
||||
terms[i] = ((TermQuery) query).getTerm();
|
||||
blendedBoost[i] = boost;
|
||||
i++;
|
||||
} else {
|
||||
if (boost != 1f) {
|
||||
query = new BoostQuery(query, boost);
|
||||
float boost = ft.boost;
|
||||
while (query instanceof BoostQuery) {
|
||||
BoostQuery bq = (BoostQuery) query;
|
||||
query = bq.getQuery();
|
||||
boost *= bq.getBoost();
|
||||
}
|
||||
if (query.getClass() == TermQuery.class) {
|
||||
terms[i] = ((TermQuery) query).getTerm();
|
||||
blendedBoost[i] = boost;
|
||||
i++;
|
||||
} else {
|
||||
if (boost != 1f) {
|
||||
query = new BoostQuery(query, boost);
|
||||
}
|
||||
queries.add(query);
|
||||
}
|
||||
queries.add(query);
|
||||
}
|
||||
}
|
||||
if (i > 0) {
|
||||
|
@ -317,6 +340,14 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
return queryBuilder.blendTerm(term, fieldType);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
||||
if (queryBuilder == null) {
|
||||
return super.blendTermsQuery(terms, fieldType);
|
||||
}
|
||||
return queryBuilder.blendTerms(terms, fieldType);
|
||||
}
|
||||
|
||||
static final class FieldAndFieldType {
|
||||
final MappedFieldType fieldType;
|
||||
final float boost;
|
||||
|
|
|
@ -28,10 +28,12 @@ import org.apache.lucene.search.BoostQuery;
|
|||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.engine.Engine;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
|
@ -55,7 +57,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
|||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
IndexService indexService = createIndex("test");
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.filter.syns.type","synonym")
|
||||
.putArray("index.analysis.filter.syns.synonyms","quick,fast")
|
||||
.put("index.analysis.analyzer.syns.tokenizer","standard")
|
||||
.put("index.analysis.analyzer.syns.filter","syns").build();
|
||||
IndexService indexService = createIndex("test", settings);
|
||||
MapperService mapperService = indexService.mapperService();
|
||||
String mapping = "{\n" +
|
||||
" \"person\":{\n" +
|
||||
|
@ -63,10 +70,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
|||
" \"name\":{\n" +
|
||||
" \"properties\":{\n" +
|
||||
" \"first\": {\n" +
|
||||
" \"type\":\"text\"\n" +
|
||||
" \"type\":\"text\",\n" +
|
||||
" \"analyzer\":\"syns\"\n" +
|
||||
" }," +
|
||||
" \"last\": {\n" +
|
||||
" \"type\":\"text\"\n" +
|
||||
" \"type\":\"text\",\n" +
|
||||
" \"analyzer\":\"syns\"\n" +
|
||||
" }" +
|
||||
" }" +
|
||||
" }\n" +
|
||||
|
@ -176,4 +185,34 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
|||
assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class));
|
||||
assertThat(parsedQuery.toString(), equalTo("_all:\"foo*\""));
|
||||
}
|
||||
|
||||
public void testMultiMatchCrossFieldsWithSynonyms() throws IOException {
|
||||
QueryShardContext queryShardContext = indexService.newQueryShardContext(
|
||||
randomInt(20), null, () -> { throw new UnsupportedOperationException(); });
|
||||
|
||||
// check that synonym query is used for a single field
|
||||
Query parsedQuery =
|
||||
multiMatchQuery("quick").field("name.first")
|
||||
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
|
||||
Term[] terms = new Term[2];
|
||||
terms[0] = new Term("name.first", "quick");
|
||||
terms[1] = new Term("name.first", "fast");
|
||||
Query expectedQuery = new SynonymQuery(terms);
|
||||
assertThat(parsedQuery, equalTo(expectedQuery));
|
||||
|
||||
// check that blended term query is used for multiple fields
|
||||
parsedQuery =
|
||||
multiMatchQuery("quick").field("name.first").field("name.last")
|
||||
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
|
||||
terms = new Term[4];
|
||||
terms[0] = new Term("name.first", "quick");
|
||||
terms[1] = new Term("name.first", "fast");
|
||||
terms[2] = new Term("name.last", "quick");
|
||||
terms[3] = new Term("name.last", "fast");
|
||||
float[] boosts = new float[4];
|
||||
Arrays.fill(boosts, 1.0f);
|
||||
expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f);
|
||||
assertThat(parsedQuery, equalTo(expectedQuery));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue