Additional Graph Support in Match Query (#22503)

Make match queries that use phrase prefix or cutoff frequency options
graph aware.

Closes #22490
This commit is contained in:
Matt Weber 2017-01-10 05:41:03 -08:00 committed by Michael McCandless
parent 9aba49c571
commit 28273e0a52
2 changed files with 97 additions and 10 deletions

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.GraphQuery;
import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
@ -48,6 +49,7 @@ import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.query.support.QueryParsers;
import java.io.IOException; import java.io.IOException;
import java.util.List;
public class MatchQuery { public class MatchQuery {
@ -316,6 +318,21 @@ public class MatchQuery {
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) { public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop); final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
if (query instanceof GraphQuery) {
// we have a graph query, convert inner queries to multi phrase prefix queries
List<Query> oldQueries = ((GraphQuery) query).getQueries();
Query[] queries = new Query[oldQueries.size()];
for (int i = 0; i < queries.length; i++) {
queries[i] = toMultiPhrasePrefix(oldQueries.get(i), phraseSlop, maxExpansions);
}
return new GraphQuery(queries);
}
return toMultiPhrasePrefix(query, phraseSlop, maxExpansions);
}
private Query toMultiPhrasePrefix(final Query query, int phraseSlop, int maxExpansions) {
float boost = 1; float boost = 1;
Query innerQuery = query; Query innerQuery = query;
while (innerQuery instanceof BoostQuery) { while (innerQuery instanceof BoostQuery) {
@ -357,19 +374,39 @@ public class MatchQuery {
Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur); Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur);
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) booleanQuery; BooleanQuery bq = (BooleanQuery) booleanQuery;
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ( return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
(BooleanQuery) booleanQuery).isCoordDisabled(), fieldType); } else if (booleanQuery != null && booleanQuery instanceof GraphQuery && ((GraphQuery) booleanQuery).hasBoolean()) {
// we have a graph query that has at least one boolean sub-query
// re-build and use extended common terms
List<Query> oldQueries = ((GraphQuery) booleanQuery).getQueries();
Query[] queries = new Query[oldQueries.size()];
for (int i = 0; i < queries.length; i++) {
Query oldQuery = oldQueries.get(i);
if (oldQuery instanceof BooleanQuery) {
queries[i] = boolToExtendedCommonTermsQuery((BooleanQuery) oldQuery, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType);
} else {
queries[i] = oldQuery;
}
}
return new GraphQuery(queries);
}
return booleanQuery;
}
private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, Occur highFreqOccur, Occur lowFreqOccur, float
maxTermFrequency, MappedFieldType fieldType) {
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency,
bq.isCoordDisabled(), fieldType);
for (BooleanClause clause : bq.clauses()) { for (BooleanClause clause : bq.clauses()) {
if (!(clause.getQuery() instanceof TermQuery)) { if (!(clause.getQuery() instanceof TermQuery)) {
return booleanQuery; return bq;
} }
query.add(((TermQuery) clause.getQuery()).getTerm()); query.add(((TermQuery) clause.getQuery()).getTerm());
} }
return query; return query;
} }
return booleanQuery;
}
} }
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {

View File

@ -65,7 +65,9 @@ public class MatchQueryIT extends ESIntegTestCase {
assertAcked(builder.addMapping(INDEX, createMapping())); assertAcked(builder.addMapping(INDEX, createMapping()));
ensureGreen(); ensureGreen();
}
private List<IndexRequestBuilder> getDocs() {
List<IndexRequestBuilder> builders = new ArrayList<>(); List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo")); builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo"));
builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man")); builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man"));
@ -74,7 +76,7 @@ public class MatchQueryIT extends ESIntegTestCase {
builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three")); builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three"));
builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three")); builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three"));
indexRandom(true, false, builders); return builders;
} }
/** /**
@ -97,6 +99,8 @@ public class MatchQueryIT extends ESIntegTestCase {
} }
public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException { public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// first search using regular synonym field using phrase // first search using regular synonym field using phrase
SearchResponse searchResponse = client().prepareSearch(INDEX) SearchResponse searchResponse = client().prepareSearch(INDEX)
.setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get(); .setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get();
@ -115,6 +119,8 @@ public class MatchQueryIT extends ESIntegTestCase {
} }
public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException { public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// first search using regular synonym field using phrase // first search using regular synonym field using phrase
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
.operator(Operator.AND).analyzer("lower_syns")).get(); .operator(Operator.AND).analyzer("lower_syns")).get();
@ -132,6 +138,8 @@ public class MatchQueryIT extends ESIntegTestCase {
} }
public void testMinShouldMatch() throws ExecutionException, InterruptedException { public void testMinShouldMatch() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// no min should match // no min should match
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo") SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
.operator(Operator.OR).analyzer("lower_graphsyns")).get(); .operator(Operator.OR).analyzer("lower_graphsyns")).get();
@ -150,4 +158,46 @@ public class MatchQueryIT extends ESIntegTestCase {
assertHitCount(searchResponse, 3L); assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "6"); assertSearchHits(searchResponse, "1", "2", "6");
} }
public void testPhrasePrefix() throws ExecutionException, InterruptedException {
List<IndexRequestBuilder> builders = getDocs();
builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!"));
builders.add(client().prepareIndex("test", "test", "8").setSource("field", "Weird Al's WHAT THE FUDGESICLE"));
indexRandom(true, false, builders);
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "wtf")
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 5L);
assertSearchHits(searchResponse, "1", "2", "3", "7", "8");
}
public void testCommonTerms() throws ExecutionException, InterruptedException {
String route = "commonTermsTest";
List<IndexRequestBuilder> builders = getDocs();
for (IndexRequestBuilder indexRequet : builders) {
// route all docs to same shard for this test
indexRequet.setRouting(route);
}
indexRandom(true, false, builders);
// do a search with no cutoff frequency to show which docs should match
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 4L);
assertSearchHits(searchResponse, "1", "2", "5", "6");
// do same search with cutoff and see less documents match
// in this case, essentially everything but "happened" gets excluded
searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "foo three happened")
.operator(Operator.OR).analyzer("lower_graphsyns").cutoffFrequency(1f)).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
}
} }