Fix GraphQuery expectation after Lucene upgrade to 6.5 (#23117)

GraphQueries are now generated as simple clauses in BooleanQuery. So for instance a multi terms synonym will generate
 a GraphQuery but only for the side paths, the other part of the query will not be impacted. This means that we cannot apply
 `minimum_should_match` or `cutoff_frequency` on GraphQuery anymore (only ES 5.3 does that because we generate all possible paths if a query has at least one multi terms synonym).
Starting in 5.4 multi terms synonym will now be treated as a single term when `minimum_should_match` is computed and will be ignored when `cutoff_frequency` is set.
Fixes #23102
This commit is contained in:
Jim Ferenczi 2017-02-10 18:20:00 +01:00 committed by GitHub
parent 09c7c5c82f
commit 1ba73d9797
5 changed files with 8 additions and 44 deletions

View File

@ -25,7 +25,6 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.GraphQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
@ -149,18 +148,7 @@ public class Queries {
return applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch);
} else if (query instanceof ExtendedCommonTermsQuery) {
((ExtendedCommonTermsQuery)query).setLowFreqMinimumNumberShouldMatch(minimumShouldMatch);
} else if (query instanceof GraphQuery && ((GraphQuery) query).hasBoolean()) {
// we have a graph query that has at least one boolean sub-query
// re-build and set minimum should match value on all boolean queries
List<Query> oldQueries = ((GraphQuery) query).getQueries();
Query[] queries = new Query[oldQueries.size()];
for (int i = 0; i < queries.length; i++) {
queries[i] = maybeApplyMinimumShouldMatch(oldQueries.get(i), minimumShouldMatch);
}
return new GraphQuery(queries);
}
return query;
}

View File

@ -19,10 +19,7 @@
package org.elasticsearch.index.query;
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.GraphQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
@ -37,7 +34,6 @@ import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MatchQuery.ZeroTermsQuery;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Objects;

View File

@ -20,8 +20,6 @@
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.GraphQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
@ -38,7 +36,6 @@ import org.elasticsearch.index.query.SimpleQueryParser.Settings;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;

View File

@ -375,23 +375,7 @@ public class MatchQuery {
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) booleanQuery;
return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
} else if (booleanQuery != null && booleanQuery instanceof GraphQuery && ((GraphQuery) booleanQuery).hasBoolean()) {
// we have a graph query that has at least one boolean sub-query
// re-build and use extended common terms
List<Query> oldQueries = ((GraphQuery) booleanQuery).getQueries();
Query[] queries = new Query[oldQueries.size()];
for (int i = 0; i < queries.length; i++) {
Query oldQuery = oldQueries.get(i);
if (oldQuery instanceof BooleanQuery) {
queries[i] = boolToExtendedCommonTermsQuery((BooleanQuery) oldQuery, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
} else {
queries[i] = oldQuery;
}
}
return new GraphQuery(queries);
}
return booleanQuery;
}

View File

@ -19,10 +19,6 @@
package org.elasticsearch.index.search;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
@ -39,6 +35,10 @@ import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
public class MatchQueryIT extends ESIntegTestCase {
private static final String INDEX = "test";
@ -172,7 +172,6 @@ public class MatchQueryIT extends ESIntegTestCase {
assertSearchHits(searchResponse, "1", "2", "3", "7", "8");
}
@AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/23102")
public void testCommonTerms() throws ExecutionException, InterruptedException {
String route = "commonTermsTest";
List<IndexRequestBuilder> builders = getDocs();
@ -185,8 +184,8 @@ public class MatchQueryIT extends ESIntegTestCase {
// do a search with no cutoff frequency to show which docs should match
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
.setQuery(QueryBuilders.matchQuery("field", "bar three happened")
.operator(Operator.OR)).get();
assertHitCount(searchResponse, 4L);
assertSearchHits(searchResponse, "1", "2", "5", "6");
@ -195,8 +194,8 @@ public class MatchQueryIT extends ESIntegTestCase {
// in this case, essentially everything but "happened" gets excluded
searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
.operator(Operator.OR).analyzer("lower_graphsyns").cutoffFrequency(1f)).get();
.setQuery(QueryBuilders.matchQuery("field", "bar three happened")
.operator(Operator.OR).cutoffFrequency(1f)).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");