Fix cross_fields type on multi_match query with synonyms (#21638)

* Fix cross_fields type on multi_match query with synonyms

This change fixes the cross_fields type of the multi_match query when synonyms are involved.
Since 2.x the Lucene query parser creates SynonymQuery for words that appear at the same position.
For simple term query the CrossFieldsQueryBuilder expands the term to all requested fields and creates a BlendedTermQuery.
This change adds the same mechanism for SynonymQuery which otherwise are not expanded to all requested fields.
As a side note I wonder if we should not replace the BlendedTermQuery with the SynonymQuery. They have the same purpose and behave similarly.

Fixes #21633

* Fallback to SynonymQuery for blended terms on a single field
This commit is contained in:
Jim Ferenczi 2016-11-28 14:14:01 +01:00 committed by GitHub
parent 8390648709
commit 69f35aa07f
3 changed files with 114 additions and 34 deletions

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.QueryBuilder;
import org.elasticsearch.ElasticsearchException;
@ -302,6 +303,11 @@ public class MatchQuery {
return blendTermQuery(term, mapper);
}
@Override
protected Query newSynonymQuery(Term[] terms) {
return blendTermsQuery(terms, mapper);
}
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
float boost = 1;
@ -358,6 +364,10 @@ public class MatchQuery {
}
}
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
return new SynonymQuery(terms);
}
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
if (fuzziness != null) {
if (fieldType != null) {

View File

@ -158,6 +158,10 @@ public class MultiMatchQuery extends MatchQuery {
return MultiMatchQuery.super.blendTermQuery(term, fieldType);
}
public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
return MultiMatchQuery.super.blendTermsQuery(terms, fieldType);
}
public Query termQuery(MappedFieldType fieldType, Object value) {
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
}
@ -223,6 +227,18 @@ public class MultiMatchQuery extends MatchQuery {
return queries.isEmpty() ? null : queries;
}
@Override
public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
if (blendedFields == null || blendedFields.length == 1) {
return super.blendTerms(terms, fieldType);
}
BytesRef[] values = new BytesRef[terms.length];
for (int i = 0; i < terms.length; i++) {
values[i] = terms[i].bytes();
}
return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, blendedFields);
}
@Override
public Query blendTerm(Term term, MappedFieldType fieldType) {
if (blendedFields == null) {
@ -243,44 +259,51 @@ public class MultiMatchQuery extends MatchQuery {
}
static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
FieldAndFieldType... blendedFields) {
return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, blendedFields);
}
static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker,
FieldAndFieldType... blendedFields) {
List<Query> queries = new ArrayList<>();
Term[] terms = new Term[blendedFields.length];
float[] blendedBoost = new float[blendedFields.length];
Term[] terms = new Term[blendedFields.length * values.length];
float[] blendedBoost = new float[blendedFields.length * values.length];
int i = 0;
for (FieldAndFieldType ft : blendedFields) {
Query query;
try {
query = ft.fieldType.termQuery(value, context);
} catch (IllegalArgumentException e) {
// the query expects a certain class of values such as numbers
// of ip addresses and the value can't be parsed, so ignore this
// field
continue;
} catch (ElasticsearchParseException parseException) {
// date fields throw an ElasticsearchParseException with the
// underlying IAE as the cause, ignore this field if that is
// the case
if (parseException.getCause() instanceof IllegalArgumentException) {
for (BytesRef term : values) {
Query query;
try {
query = ft.fieldType.termQuery(term, context);
} catch (IllegalArgumentException e) {
// the query expects a certain class of values such as numbers
// of ip addresses and the value can't be parsed, so ignore this
// field
continue;
} catch (ElasticsearchParseException parseException) {
// date fields throw an ElasticsearchParseException with the
// underlying IAE as the cause, ignore this field if that is
// the case
if (parseException.getCause() instanceof IllegalArgumentException) {
continue;
}
throw parseException;
}
throw parseException;
}
float boost = ft.boost;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
boost *= bq.getBoost();
}
if (query.getClass() == TermQuery.class) {
terms[i] = ((TermQuery) query).getTerm();
blendedBoost[i] = boost;
i++;
} else {
if (boost != 1f) {
query = new BoostQuery(query, boost);
float boost = ft.boost;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
boost *= bq.getBoost();
}
if (query.getClass() == TermQuery.class) {
terms[i] = ((TermQuery) query).getTerm();
blendedBoost[i] = boost;
i++;
} else {
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
queries.add(query);
}
queries.add(query);
}
}
if (i > 0) {
@ -317,6 +340,14 @@ public class MultiMatchQuery extends MatchQuery {
return queryBuilder.blendTerm(term, fieldType);
}
@Override
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
if (queryBuilder == null) {
return super.blendTermsQuery(terms, fieldType);
}
return queryBuilder.blendTerms(terms, fieldType);
}
static final class FieldAndFieldType {
final MappedFieldType fieldType;
final float boost;

View File

@ -28,10 +28,12 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService;
@ -55,7 +57,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
@Before
public void setup() throws IOException {
IndexService indexService = createIndex("test");
Settings settings = Settings.builder()
.put("index.analysis.filter.syns.type","synonym")
.putArray("index.analysis.filter.syns.synonyms","quick,fast")
.put("index.analysis.analyzer.syns.tokenizer","standard")
.put("index.analysis.analyzer.syns.filter","syns").build();
IndexService indexService = createIndex("test", settings);
MapperService mapperService = indexService.mapperService();
String mapping = "{\n" +
" \"person\":{\n" +
@ -63,10 +70,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
" \"name\":{\n" +
" \"properties\":{\n" +
" \"first\": {\n" +
" \"type\":\"text\"\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" }," +
" \"last\": {\n" +
" \"type\":\"text\"\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" }" +
" }" +
" }\n" +
@ -176,4 +185,34 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class));
assertThat(parsedQuery.toString(), equalTo("_all:\"foo*\""));
}
public void testMultiMatchCrossFieldsWithSynonyms() throws IOException {
QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> { throw new UnsupportedOperationException(); });
// check that synonym query is used for a single field
Query parsedQuery =
multiMatchQuery("quick").field("name.first")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
Term[] terms = new Term[2];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
Query expectedQuery = new SynonymQuery(terms);
assertThat(parsedQuery, equalTo(expectedQuery));
// check that blended term query is used for multiple fields
parsedQuery =
multiMatchQuery("quick").field("name.first").field("name.last")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
terms = new Term[4];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
terms[2] = new Term("name.last", "quick");
terms[3] = new Term("name.last", "fast");
float[] boosts = new float[4];
Arrays.fill(boosts, 1.0f);
expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f);
assertThat(parsedQuery, equalTo(expectedQuery));
}
}