Additional Graph Support in Match Query (#22503)
Make match queries that use phrase prefix or cutoff frequency options graph aware. Closes #22490
This commit is contained in:
parent
9aba49c571
commit
28273e0a52
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.GraphQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
|
@ -48,6 +49,7 @@ import org.elasticsearch.index.query.QueryShardContext;
|
|||
import org.elasticsearch.index.query.support.QueryParsers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class MatchQuery {
|
||||
|
||||
|
@ -316,6 +318,21 @@ public class MatchQuery {
|
|||
|
||||
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
|
||||
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
|
||||
if (query instanceof GraphQuery) {
|
||||
// we have a graph query, convert inner queries to multi phrase prefix queries
|
||||
List<Query> oldQueries = ((GraphQuery) query).getQueries();
|
||||
Query[] queries = new Query[oldQueries.size()];
|
||||
for (int i = 0; i < queries.length; i++) {
|
||||
queries[i] = toMultiPhrasePrefix(oldQueries.get(i), phraseSlop, maxExpansions);
|
||||
}
|
||||
|
||||
return new GraphQuery(queries);
|
||||
}
|
||||
|
||||
return toMultiPhrasePrefix(query, phraseSlop, maxExpansions);
|
||||
}
|
||||
|
||||
private Query toMultiPhrasePrefix(final Query query, int phraseSlop, int maxExpansions) {
|
||||
float boost = 1;
|
||||
Query innerQuery = query;
|
||||
while (innerQuery instanceof BoostQuery) {
|
||||
|
@ -357,18 +374,38 @@ public class MatchQuery {
|
|||
Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur);
|
||||
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
|
||||
BooleanQuery bq = (BooleanQuery) booleanQuery;
|
||||
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, (
|
||||
(BooleanQuery) booleanQuery).isCoordDisabled(), fieldType);
|
||||
for (BooleanClause clause : bq.clauses()) {
|
||||
if (!(clause.getQuery() instanceof TermQuery)) {
|
||||
return booleanQuery;
|
||||
return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
|
||||
} else if (booleanQuery != null && booleanQuery instanceof GraphQuery && ((GraphQuery) booleanQuery).hasBoolean()) {
|
||||
// we have a graph query that has at least one boolean sub-query
|
||||
// re-build and use extended common terms
|
||||
List<Query> oldQueries = ((GraphQuery) booleanQuery).getQueries();
|
||||
Query[] queries = new Query[oldQueries.size()];
|
||||
for (int i = 0; i < queries.length; i++) {
|
||||
Query oldQuery = oldQueries.get(i);
|
||||
if (oldQuery instanceof BooleanQuery) {
|
||||
queries[i] = boolToExtendedCommonTermsQuery((BooleanQuery) oldQuery, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
|
||||
} else {
|
||||
queries[i] = oldQuery;
|
||||
}
|
||||
query.add(((TermQuery) clause.getQuery()).getTerm());
|
||||
}
|
||||
return query;
|
||||
}
|
||||
return booleanQuery;
|
||||
|
||||
return new GraphQuery(queries);
|
||||
}
|
||||
|
||||
return booleanQuery;
|
||||
}
|
||||
|
||||
private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, Occur highFreqOccur, Occur lowFreqOccur, float
|
||||
maxTermFrequency, MappedFieldType fieldType) {
|
||||
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency,
|
||||
bq.isCoordDisabled(), fieldType);
|
||||
for (BooleanClause clause : bq.clauses()) {
|
||||
if (!(clause.getQuery() instanceof TermQuery)) {
|
||||
return bq;
|
||||
}
|
||||
query.add(((TermQuery) clause.getQuery()).getTerm());
|
||||
}
|
||||
return query;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -65,7 +65,9 @@ public class MatchQueryIT extends ESIntegTestCase {
|
|||
|
||||
assertAcked(builder.addMapping(INDEX, createMapping()));
|
||||
ensureGreen();
|
||||
}
|
||||
|
||||
private List<IndexRequestBuilder> getDocs() {
|
||||
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||
builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo"));
|
||||
builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man"));
|
||||
|
@ -74,7 +76,7 @@ public class MatchQueryIT extends ESIntegTestCase {
|
|||
builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three"));
|
||||
builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three"));
|
||||
|
||||
indexRandom(true, false, builders);
|
||||
return builders;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -97,6 +99,8 @@ public class MatchQueryIT extends ESIntegTestCase {
|
|||
}
|
||||
|
||||
public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException {
|
||||
indexRandom(true, false, getDocs());
|
||||
|
||||
// first search using regular synonym field using phrase
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX)
|
||||
.setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get();
|
||||
|
@ -115,6 +119,8 @@ public class MatchQueryIT extends ESIntegTestCase {
|
|||
}
|
||||
|
||||
public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException {
|
||||
indexRandom(true, false, getDocs());
|
||||
|
||||
// first search using regular synonym field using phrase
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
|
||||
.operator(Operator.AND).analyzer("lower_syns")).get();
|
||||
|
@ -132,6 +138,8 @@ public class MatchQueryIT extends ESIntegTestCase {
|
|||
}
|
||||
|
||||
public void testMinShouldMatch() throws ExecutionException, InterruptedException {
|
||||
indexRandom(true, false, getDocs());
|
||||
|
||||
// no min should match
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
|
||||
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
|
||||
|
@ -150,4 +158,46 @@ public class MatchQueryIT extends ESIntegTestCase {
|
|||
assertHitCount(searchResponse, 3L);
|
||||
assertSearchHits(searchResponse, "1", "2", "6");
|
||||
}
|
||||
|
||||
public void testPhrasePrefix() throws ExecutionException, InterruptedException {
|
||||
List<IndexRequestBuilder> builders = getDocs();
|
||||
builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!"));
|
||||
builders.add(client().prepareIndex("test", "test", "8").setSource("field", "Weird Al's WHAT THE FUDGESICLE"));
|
||||
indexRandom(true, false, builders);
|
||||
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "wtf")
|
||||
.analyzer("lower_graphsyns")).get();
|
||||
|
||||
assertHitCount(searchResponse, 5L);
|
||||
assertSearchHits(searchResponse, "1", "2", "3", "7", "8");
|
||||
}
|
||||
|
||||
public void testCommonTerms() throws ExecutionException, InterruptedException {
|
||||
String route = "commonTermsTest";
|
||||
List<IndexRequestBuilder> builders = getDocs();
|
||||
for (IndexRequestBuilder indexRequet : builders) {
|
||||
// route all docs to same shard for this test
|
||||
indexRequet.setRouting(route);
|
||||
}
|
||||
indexRandom(true, false, builders);
|
||||
|
||||
// do a search with no cutoff frequency to show which docs should match
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX)
|
||||
.setRouting(route)
|
||||
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
|
||||
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
|
||||
|
||||
assertHitCount(searchResponse, 4L);
|
||||
assertSearchHits(searchResponse, "1", "2", "5", "6");
|
||||
|
||||
// do same search with cutoff and see less documents match
|
||||
// in this case, essentially everything but "happened" gets excluded
|
||||
searchResponse = client().prepareSearch(INDEX)
|
||||
.setRouting(route)
|
||||
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
|
||||
.operator(Operator.OR).analyzer("lower_graphsyns").cutoffFrequency(1f)).get();
|
||||
|
||||
assertHitCount(searchResponse, 1L);
|
||||
assertSearchHits(searchResponse, "1");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue