Additional Graph Support in Match Query (#22503)
Make match queries that use phrase prefix or cutoff frequency options graph aware. Closes #22490
This commit is contained in:
parent
9aba49c571
commit
28273e0a52
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.FuzzyQuery;
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
|
import org.apache.lucene.search.GraphQuery;
|
||||||
import org.apache.lucene.search.MultiPhraseQuery;
|
import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
import org.apache.lucene.search.MultiTermQuery;
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
|
@ -48,6 +49,7 @@ import org.elasticsearch.index.query.QueryShardContext;
|
||||||
import org.elasticsearch.index.query.support.QueryParsers;
|
import org.elasticsearch.index.query.support.QueryParsers;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class MatchQuery {
|
public class MatchQuery {
|
||||||
|
|
||||||
|
@ -316,6 +318,21 @@ public class MatchQuery {
|
||||||
|
|
||||||
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
|
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
|
||||||
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
|
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
|
||||||
|
if (query instanceof GraphQuery) {
|
||||||
|
// we have a graph query, convert inner queries to multi phrase prefix queries
|
||||||
|
List<Query> oldQueries = ((GraphQuery) query).getQueries();
|
||||||
|
Query[] queries = new Query[oldQueries.size()];
|
||||||
|
for (int i = 0; i < queries.length; i++) {
|
||||||
|
queries[i] = toMultiPhrasePrefix(oldQueries.get(i), phraseSlop, maxExpansions);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new GraphQuery(queries);
|
||||||
|
}
|
||||||
|
|
||||||
|
return toMultiPhrasePrefix(query, phraseSlop, maxExpansions);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query toMultiPhrasePrefix(final Query query, int phraseSlop, int maxExpansions) {
|
||||||
float boost = 1;
|
float boost = 1;
|
||||||
Query innerQuery = query;
|
Query innerQuery = query;
|
||||||
while (innerQuery instanceof BoostQuery) {
|
while (innerQuery instanceof BoostQuery) {
|
||||||
|
@ -357,19 +374,39 @@ public class MatchQuery {
|
||||||
Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur);
|
Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur);
|
||||||
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
|
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
|
||||||
BooleanQuery bq = (BooleanQuery) booleanQuery;
|
BooleanQuery bq = (BooleanQuery) booleanQuery;
|
||||||
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, (
|
return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
|
||||||
(BooleanQuery) booleanQuery).isCoordDisabled(), fieldType);
|
} else if (booleanQuery != null && booleanQuery instanceof GraphQuery && ((GraphQuery) booleanQuery).hasBoolean()) {
|
||||||
|
// we have a graph query that has at least one boolean sub-query
|
||||||
|
// re-build and use extended common terms
|
||||||
|
List<Query> oldQueries = ((GraphQuery) booleanQuery).getQueries();
|
||||||
|
Query[] queries = new Query[oldQueries.size()];
|
||||||
|
for (int i = 0; i < queries.length; i++) {
|
||||||
|
Query oldQuery = oldQueries.get(i);
|
||||||
|
if (oldQuery instanceof BooleanQuery) {
|
||||||
|
queries[i] = boolToExtendedCommonTermsQuery((BooleanQuery) oldQuery, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
|
||||||
|
} else {
|
||||||
|
queries[i] = oldQuery;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new GraphQuery(queries);
|
||||||
|
}
|
||||||
|
|
||||||
|
return booleanQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, Occur highFreqOccur, Occur lowFreqOccur, float
|
||||||
|
maxTermFrequency, MappedFieldType fieldType) {
|
||||||
|
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency,
|
||||||
|
bq.isCoordDisabled(), fieldType);
|
||||||
for (BooleanClause clause : bq.clauses()) {
|
for (BooleanClause clause : bq.clauses()) {
|
||||||
if (!(clause.getQuery() instanceof TermQuery)) {
|
if (!(clause.getQuery() instanceof TermQuery)) {
|
||||||
return booleanQuery;
|
return bq;
|
||||||
}
|
}
|
||||||
query.add(((TermQuery) clause.getQuery()).getTerm());
|
query.add(((TermQuery) clause.getQuery()).getTerm());
|
||||||
}
|
}
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
return booleanQuery;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
|
||||||
|
|
|
@ -65,7 +65,9 @@ public class MatchQueryIT extends ESIntegTestCase {
|
||||||
|
|
||||||
assertAcked(builder.addMapping(INDEX, createMapping()));
|
assertAcked(builder.addMapping(INDEX, createMapping()));
|
||||||
ensureGreen();
|
ensureGreen();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<IndexRequestBuilder> getDocs() {
|
||||||
List<IndexRequestBuilder> builders = new ArrayList<>();
|
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||||
builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo"));
|
builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo"));
|
||||||
builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man"));
|
builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man"));
|
||||||
|
@ -74,7 +76,7 @@ public class MatchQueryIT extends ESIntegTestCase {
|
||||||
builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three"));
|
builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three"));
|
||||||
builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three"));
|
builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three"));
|
||||||
|
|
||||||
indexRandom(true, false, builders);
|
return builders;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,6 +99,8 @@ public class MatchQueryIT extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException {
|
public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException {
|
||||||
|
indexRandom(true, false, getDocs());
|
||||||
|
|
||||||
// first search using regular synonym field using phrase
|
// first search using regular synonym field using phrase
|
||||||
SearchResponse searchResponse = client().prepareSearch(INDEX)
|
SearchResponse searchResponse = client().prepareSearch(INDEX)
|
||||||
.setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get();
|
.setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get();
|
||||||
|
@ -115,6 +119,8 @@ public class MatchQueryIT extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException {
|
public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException {
|
||||||
|
indexRandom(true, false, getDocs());
|
||||||
|
|
||||||
// first search using regular synonym field using phrase
|
// first search using regular synonym field using phrase
|
||||||
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
|
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
|
||||||
.operator(Operator.AND).analyzer("lower_syns")).get();
|
.operator(Operator.AND).analyzer("lower_syns")).get();
|
||||||
|
@ -132,6 +138,8 @@ public class MatchQueryIT extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMinShouldMatch() throws ExecutionException, InterruptedException {
|
public void testMinShouldMatch() throws ExecutionException, InterruptedException {
|
||||||
|
indexRandom(true, false, getDocs());
|
||||||
|
|
||||||
// no min should match
|
// no min should match
|
||||||
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
|
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
|
||||||
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
|
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
|
||||||
|
@ -150,4 +158,46 @@ public class MatchQueryIT extends ESIntegTestCase {
|
||||||
assertHitCount(searchResponse, 3L);
|
assertHitCount(searchResponse, 3L);
|
||||||
assertSearchHits(searchResponse, "1", "2", "6");
|
assertSearchHits(searchResponse, "1", "2", "6");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPhrasePrefix() throws ExecutionException, InterruptedException {
|
||||||
|
List<IndexRequestBuilder> builders = getDocs();
|
||||||
|
builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!"));
|
||||||
|
builders.add(client().prepareIndex("test", "test", "8").setSource("field", "Weird Al's WHAT THE FUDGESICLE"));
|
||||||
|
indexRandom(true, false, builders);
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "wtf")
|
||||||
|
.analyzer("lower_graphsyns")).get();
|
||||||
|
|
||||||
|
assertHitCount(searchResponse, 5L);
|
||||||
|
assertSearchHits(searchResponse, "1", "2", "3", "7", "8");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCommonTerms() throws ExecutionException, InterruptedException {
|
||||||
|
String route = "commonTermsTest";
|
||||||
|
List<IndexRequestBuilder> builders = getDocs();
|
||||||
|
for (IndexRequestBuilder indexRequet : builders) {
|
||||||
|
// route all docs to same shard for this test
|
||||||
|
indexRequet.setRouting(route);
|
||||||
|
}
|
||||||
|
indexRandom(true, false, builders);
|
||||||
|
|
||||||
|
// do a search with no cutoff frequency to show which docs should match
|
||||||
|
SearchResponse searchResponse = client().prepareSearch(INDEX)
|
||||||
|
.setRouting(route)
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
|
||||||
|
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
|
||||||
|
|
||||||
|
assertHitCount(searchResponse, 4L);
|
||||||
|
assertSearchHits(searchResponse, "1", "2", "5", "6");
|
||||||
|
|
||||||
|
// do same search with cutoff and see less documents match
|
||||||
|
// in this case, essentially everything but "happened" gets excluded
|
||||||
|
searchResponse = client().prepareSearch(INDEX)
|
||||||
|
.setRouting(route)
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
|
||||||
|
.operator(Operator.OR).analyzer("lower_graphsyns").cutoffFrequency(1f)).get();
|
||||||
|
|
||||||
|
assertHitCount(searchResponse, 1L);
|
||||||
|
assertSearchHits(searchResponse, "1");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue