Fix synonym phrase query expansion for cross_fields parsing (#28045)
* Fix synonym phrase query expansion for cross_fields parsing The `cross_fields` mode for query parser ignores phrase query generated by multi-word synonyms. In such case only the first field of each analyzer group is kept. This change fixes this issue by expanding the phrase query for each analyzer group to **all** fields using a disjunction max query.
This commit is contained in:
parent
3895add2ca
commit
190f1e1fb3
|
@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
import org.apache.lucene.search.FuzzyQuery;
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.search.MultiPhraseQuery;
|
import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
import org.apache.lucene.search.MultiTermQuery;
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
|
@ -350,7 +351,12 @@ public class MatchQuery {
|
||||||
throw exc;
|
throw exc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return super.analyzePhrase(field, stream, slop);
|
Query query = super.analyzePhrase(field, stream, slop);
|
||||||
|
if (query instanceof PhraseQuery) {
|
||||||
|
// synonyms that expand to multiple terms can return a phrase query.
|
||||||
|
return blendPhraseQuery((PhraseQuery) query, mapper);
|
||||||
|
}
|
||||||
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -472,6 +478,14 @@ public class MatchQuery {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when a phrase query is built with {@link QueryBuilder#analyzePhrase(String, TokenStream, int)}.
|
||||||
|
* Subclass can override this function to blend this query to multiple fields.
|
||||||
|
*/
|
||||||
|
protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) {
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
||||||
return new SynonymQuery(terms);
|
return new SynonymQuery(terms);
|
||||||
}
|
}
|
||||||
|
@ -494,5 +508,4 @@ public class MatchQuery {
|
||||||
}
|
}
|
||||||
return termQuery(fieldType, term.bytes(), lenient);
|
return termQuery(fieldType, term.bytes(), lenient);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,10 +25,10 @@ import org.apache.lucene.queries.BlendedTermQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.ElasticsearchParseException;
|
|
||||||
import org.elasticsearch.common.lucene.search.Queries;
|
import org.elasticsearch.common.lucene.search.Queries;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.query.AbstractQueryBuilder;
|
import org.elasticsearch.index.query.AbstractQueryBuilder;
|
||||||
|
@ -143,6 +143,10 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
public Query termQuery(MappedFieldType fieldType, BytesRef value) {
|
public Query termQuery(MappedFieldType fieldType, BytesRef value) {
|
||||||
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
|
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Query blendPhrase(PhraseQuery query, MappedFieldType type) {
|
||||||
|
return MultiMatchQuery.super.blendPhraseQuery(query, type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final class CrossFieldsQueryBuilder extends QueryBuilder {
|
final class CrossFieldsQueryBuilder extends QueryBuilder {
|
||||||
|
@ -226,6 +230,17 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
*/
|
*/
|
||||||
return blendTerm(new Term(fieldType.name(), value.utf8ToString()), fieldType);
|
return blendTerm(new Term(fieldType.name(), value.utf8ToString()), fieldType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query blendPhrase(PhraseQuery query, MappedFieldType type) {
|
||||||
|
if (blendedFields == null) {
|
||||||
|
return super.blendPhrase(query, type);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* We build phrase queries for multi-word synonyms when {@link QueryBuilder#autoGenerateSynonymsPhraseQuery} is true.
|
||||||
|
*/
|
||||||
|
return MultiMatchQuery.blendPhrase(query, blendedFields);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
|
static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
|
||||||
|
@ -288,6 +303,28 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expand a {@link PhraseQuery} to multiple fields that share the same analyzer.
|
||||||
|
* Returns a {@link DisjunctionMaxQuery} with a disjunction for each expanded field.
|
||||||
|
*/
|
||||||
|
static Query blendPhrase(PhraseQuery query, FieldAndFieldType... fields) {
|
||||||
|
List<Query> disjunctions = new ArrayList<>();
|
||||||
|
for (FieldAndFieldType field : fields) {
|
||||||
|
int[] positions = query.getPositions();
|
||||||
|
Term[] terms = query.getTerms();
|
||||||
|
PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
||||||
|
for (int i = 0; i < terms.length; i++) {
|
||||||
|
builder.add(new Term(field.fieldType.name(), terms[i].bytes()), positions[i]);
|
||||||
|
}
|
||||||
|
Query q = builder.build();
|
||||||
|
if (field.boost != AbstractQueryBuilder.DEFAULT_BOOST) {
|
||||||
|
q = new BoostQuery(q, field.boost);
|
||||||
|
}
|
||||||
|
disjunctions.add(q);
|
||||||
|
}
|
||||||
|
return new DisjunctionMaxQuery(disjunctions, 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
|
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
|
||||||
if (queryBuilder == null) {
|
if (queryBuilder == null) {
|
||||||
|
@ -304,6 +341,14 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
return queryBuilder.blendTerms(terms, fieldType);
|
return queryBuilder.blendTerms(terms, fieldType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) {
|
||||||
|
if (queryBuilder == null) {
|
||||||
|
return super.blendPhraseQuery(query, fieldType);
|
||||||
|
}
|
||||||
|
return queryBuilder.blendPhrase(query, fieldType);
|
||||||
|
}
|
||||||
|
|
||||||
static final class FieldAndFieldType {
|
static final class FieldAndFieldType {
|
||||||
final MappedFieldType fieldType;
|
final MappedFieldType fieldType;
|
||||||
final float boost;
|
final float boost;
|
||||||
|
|
|
@ -19,12 +19,16 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.search;
|
package org.elasticsearch.index.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockSynonymAnalyzer;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.BlendedTermQuery;
|
import org.apache.lucene.queries.BlendedTermQuery;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SynonymQuery;
|
import org.apache.lucene.search.SynonymQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
@ -43,7 +47,11 @@ import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
@ -220,4 +228,45 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
|
||||||
assertThat(parsedQuery, equalTo(expectedQuery));
|
assertThat(parsedQuery, equalTo(expectedQuery));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMultiMatchCrossFieldsWithSynonymsPhrase() throws IOException {
|
||||||
|
QueryShardContext queryShardContext = indexService.newQueryShardContext(
|
||||||
|
randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null);
|
||||||
|
MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
|
||||||
|
parser.setAnalyzer(new MockSynonymAnalyzer());
|
||||||
|
Map<String, Float> fieldNames = new HashMap<>();
|
||||||
|
fieldNames.put("name.first", 1.0f);
|
||||||
|
fieldNames.put("name.last", 1.0f);
|
||||||
|
Query query = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "guinea pig", null);
|
||||||
|
|
||||||
|
Term[] terms = new Term[2];
|
||||||
|
terms[0] = new Term("name.first", "cavy");
|
||||||
|
terms[1] = new Term("name.last", "cavy");
|
||||||
|
float[] boosts = new float[2];
|
||||||
|
Arrays.fill(boosts, 1.0f);
|
||||||
|
|
||||||
|
List<Query> phraseDisjuncts = new ArrayList<>();
|
||||||
|
phraseDisjuncts.add(
|
||||||
|
new PhraseQuery.Builder()
|
||||||
|
.add(new Term("name.first", "guinea"))
|
||||||
|
.add(new Term("name.first", "pig"))
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
phraseDisjuncts.add(
|
||||||
|
new PhraseQuery.Builder()
|
||||||
|
.add(new Term("name.last", "guinea"))
|
||||||
|
.add(new Term("name.last", "pig"))
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
BooleanQuery expected = new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new DisjunctionMaxQuery(phraseDisjuncts, 0.0f), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f), BooleanClause.Occur.SHOULD)
|
||||||
|
.build(),
|
||||||
|
BooleanClause.Occur.SHOULD
|
||||||
|
)
|
||||||
|
.build();
|
||||||
|
assertEquals(expected, query);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue