From 28273e0a52c443d5a5d304767fd21373e5a81b0b Mon Sep 17 00:00:00 2001 From: Matt Weber Date: Tue, 10 Jan 2017 05:41:03 -0800 Subject: [PATCH] Additional Graph Support in Match Query (#22503) Make match queries that use phrase prefix or cutoff frequency options graph aware. Closes #22490 --- .../index/search/MatchQuery.java | 55 ++++++++++++++++--- .../index/search/MatchQueryIT.java | 52 +++++++++++++++++- 2 files changed, 97 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 1fbeb81febc..738bfee061f 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.GraphQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; @@ -48,6 +49,7 @@ import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.List; public class MatchQuery { @@ -316,6 +318,21 @@ public class MatchQuery { public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) { final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop); + if (query instanceof GraphQuery) { + // we have a graph query, convert inner queries to multi phrase prefix queries + List oldQueries = ((GraphQuery) query).getQueries(); + Query[] queries = new Query[oldQueries.size()]; + for (int i = 0; i < queries.length; i++) { + queries[i] = toMultiPhrasePrefix(oldQueries.get(i), phraseSlop, maxExpansions); + } + + return new GraphQuery(queries); + } + + return toMultiPhrasePrefix(query, phraseSlop, maxExpansions); + } + + private Query toMultiPhrasePrefix(final Query query, int phraseSlop, int maxExpansions) { float boost = 1; Query innerQuery = query; while (innerQuery instanceof BoostQuery) { @@ -357,18 +374,38 @@ public class MatchQuery { Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur); if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) booleanQuery; - ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ( - (BooleanQuery) booleanQuery).isCoordDisabled(), fieldType); - for (BooleanClause clause : bq.clauses()) { - if (!(clause.getQuery() instanceof TermQuery)) { - return booleanQuery; + return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType); + } else if (booleanQuery != null && booleanQuery instanceof GraphQuery && ((GraphQuery) booleanQuery).hasBoolean()) { + // we have a graph query that has at least one boolean sub-query + // re-build and use extended common terms + List oldQueries = ((GraphQuery) booleanQuery).getQueries(); + Query[] queries = new Query[oldQueries.size()]; + for (int i = 0; i < queries.length; i++) { + Query oldQuery = oldQueries.get(i); + if (oldQuery instanceof BooleanQuery) { + queries[i] = boolToExtendedCommonTermsQuery((BooleanQuery) oldQuery, highFreqOccur, lowFreqOccur, maxTermFrequency, fieldType); + } else { + queries[i] = oldQuery; } - query.add(((TermQuery) clause.getQuery()).getTerm()); } - return query; - } - return booleanQuery; + return new GraphQuery(queries); + } + + return booleanQuery; + } + + private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, Occur highFreqOccur, Occur lowFreqOccur, float + maxTermFrequency, MappedFieldType fieldType) { + ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, + bq.isCoordDisabled(), fieldType); + for (BooleanClause clause : bq.clauses()) { + if (!(clause.getQuery() instanceof TermQuery)) { + return bq; + } + query.add(((TermQuery) clause.getQuery()).getTerm()); + } + return query; } } diff --git a/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java b/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java index 0cd185bc03a..2381b8bdc38 100644 --- a/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java +++ b/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java @@ -65,7 +65,9 @@ public class MatchQueryIT extends ESIntegTestCase { assertAcked(builder.addMapping(INDEX, createMapping())); ensureGreen(); + } + private List getDocs() { List builders = new ArrayList<>(); builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo")); builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man")); @@ -74,7 +76,7 @@ public class MatchQueryIT extends ESIntegTestCase { builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three")); builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three")); - indexRandom(true, false, builders); + return builders; } /** @@ -97,6 +99,8 @@ public class MatchQueryIT extends ESIntegTestCase { } public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException { + indexRandom(true, false, getDocs()); + // first search using regular synonym field using phrase SearchResponse searchResponse = client().prepareSearch(INDEX) .setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get(); @@ -115,6 +119,8 @@ public class MatchQueryIT extends ESIntegTestCase { } public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException { + indexRandom(true, false, getDocs()); + // first search using regular synonym field using phrase SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") .operator(Operator.AND).analyzer("lower_syns")).get(); @@ -132,6 +138,8 @@ public class MatchQueryIT extends ESIntegTestCase { } public void testMinShouldMatch() throws ExecutionException, InterruptedException { + indexRandom(true, false, getDocs()); + // no min should match SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo") .operator(Operator.OR).analyzer("lower_graphsyns")).get(); @@ -150,4 +158,46 @@ public class MatchQueryIT extends ESIntegTestCase { assertHitCount(searchResponse, 3L); assertSearchHits(searchResponse, "1", "2", "6"); } + + public void testPhrasePrefix() throws ExecutionException, InterruptedException { + List builders = getDocs(); + builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!")); + builders.add(client().prepareIndex("test", "test", "8").setSource("field", "Weird Al's WHAT THE FUDGESICLE")); + indexRandom(true, false, builders); + + SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "wtf") + .analyzer("lower_graphsyns")).get(); + + assertHitCount(searchResponse, 5L); + assertSearchHits(searchResponse, "1", "2", "3", "7", "8"); + } + + public void testCommonTerms() throws ExecutionException, InterruptedException { + String route = "commonTermsTest"; + List builders = getDocs(); + for (IndexRequestBuilder indexRequet : builders) { + // route all docs to same shard for this test + indexRequet.setRouting(route); + } + indexRandom(true, false, builders); + + // do a search with no cutoff frequency to show which docs should match + SearchResponse searchResponse = client().prepareSearch(INDEX) + .setRouting(route) + .setQuery(QueryBuilders.matchQuery("field", "foo three happened") + .operator(Operator.OR).analyzer("lower_graphsyns")).get(); + + assertHitCount(searchResponse, 4L); + assertSearchHits(searchResponse, "1", "2", "5", "6"); + + // do same search with cutoff and see less documents match + // in this case, essentially everything but "happened" gets excluded + searchResponse = client().prepareSearch(INDEX) + .setRouting(route) + .setQuery(QueryBuilders.matchQuery("field", "foo three happened") + .operator(Operator.OR).analyzer("lower_graphsyns").cutoffFrequency(1f)).get(); + + assertHitCount(searchResponse, 1L); + assertSearchHits(searchResponse, "1"); + } }