diff --git a/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java b/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java index e45aa93fd5e..c045f49a3e5 100644 --- a/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java +++ b/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java @@ -17,7 +17,14 @@ package org.apache.lucene.queries; * specific language governing permissions and limitations * under the License. */ +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + import org.elasticsearch.common.lucene.search.Queries; /** @@ -35,18 +42,97 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery { super(highFreqOccur, lowFreqOccur, maxTermFrequency); } - private String minNumShouldMatchSpec; + private String lowFreqMinNumShouldMatchSpec; + private String highFreqMinNumShouldMatchSpec; @Override protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) { - if (minNumShouldMatchSpec == null) { - return 0; - } - return Queries.calculateMinShouldMatch(numOptional, minNumShouldMatchSpec); - } - - public void setMinimumNumberShouldMatch(String spec) { - this.minNumShouldMatchSpec = spec; + return calcMinimumNumberShouldMatch(lowFreqMinNumShouldMatchSpec, numOptional); } + protected int calcMinimumNumberShouldMatch(String spec, int numOptional) { + if (spec == null) { + return 0; + } + return Queries.calculateMinShouldMatch(numOptional, spec); + } + + protected int calcHighFreqMinimumNumberShouldMatch(int numOptional) { + return calcMinimumNumberShouldMatch(highFreqMinNumShouldMatchSpec, numOptional); + } + + public void setHighFreqMinimumNumberShouldMatch(String spec) { + this.highFreqMinNumShouldMatchSpec = spec; + } + + public String getHighFreqMinimumNumberShouldMatch() { + return highFreqMinNumShouldMatchSpec; + } + + public void setLowFreqMinimumNumberShouldMatch(String spec) { + this.lowFreqMinNumShouldMatchSpec = spec; + } + + public String getLowFreqMinimumNumberShouldMatch() { + return lowFreqMinNumShouldMatchSpec; + } + + @Override + protected Query buildQuery(final int maxDoc, final TermContext[] contextArray, final Term[] queryTerms) { + BooleanQuery lowFreq = new BooleanQuery(disableCoord); + BooleanQuery highFreq = new BooleanQuery(disableCoord); + highFreq.setBoost(highFreqBoost); + lowFreq.setBoost(lowFreqBoost); + BooleanQuery query = new BooleanQuery(true); + + for (int i = 0; i < queryTerms.length; i++) { + TermContext termContext = contextArray[i]; + if (termContext == null) { + lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur); + } else { + if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency) || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) { + highFreq.add(new TermQuery(queryTerms[i], termContext), highFreqOccur); + } else { + lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur); + } + } + } + + final int numLowFreqClauses = lowFreq.clauses().size(), + numHighFreqClauses = highFreq.clauses().size(); + + if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) { + int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses); + lowFreq.setMinimumNumberShouldMatch(minMustMatch); + } + + if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) { + int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses); + highFreq.setMinimumNumberShouldMatch(minMustMatch); + } + + if (lowFreq.clauses().isEmpty()) { + /* + * if lowFreq is empty we rewrite the high freq terms in a conjunction to + * prevent slow queries. + * Only if a specic high_freq should_match is not specified. + */ + if (highFreqMinNumShouldMatchSpec == null && highFreqOccur != Occur.MUST) { + for (BooleanClause booleanClause : highFreq) { + booleanClause.setOccur(Occur.MUST); + } + } + highFreq.setBoost(getBoost()); + return highFreq; + } else if (highFreq.clauses().isEmpty()) { + // only do low freq terms - we don't have high freq terms + lowFreq.setBoost(getBoost()); + return lowFreq; + } else { + query.add(highFreq, Occur.SHOULD); + query.add(lowFreq, Occur.MUST); + query.setBoost(getBoost()); + return query; + } + } } diff --git a/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java index b972683726b..ff09e9adc97 100644 --- a/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java @@ -58,7 +58,9 @@ public class CommonTermsQueryBuilder extends BaseQueryBuilder implements Boostab private Float boost = null; - private String minimumShouldMatch = null; + private String lowFreqMinimumShouldMatch = null; + + private String highFreqMinimumShouldMatch = null; private Boolean disableCoords = null; @@ -127,11 +129,20 @@ public class CommonTermsQueryBuilder extends BaseQueryBuilder implements Boostab } /** - * Sets the minimum number of query terms that need to match in order to + * Sets the minimum number of high frequent query terms that need to match in order to + * produce a hit when there are no low frequen terms. + */ + public CommonTermsQueryBuilder highFreqMinimumShouldMatch(String highFreqMinimumShouldMatch) { + this.highFreqMinimumShouldMatch = highFreqMinimumShouldMatch; + return this; + } + + /** + * Sets the minimum number of low frequent query terms that need to match in order to * produce a hit. */ - public CommonTermsQueryBuilder minimumShouldMatch(String minimumShouldMatch) { - this.minimumShouldMatch = minimumShouldMatch; + public CommonTermsQueryBuilder lowFreqMinimumShouldMatch(String lowFreqMinimumShouldMatch) { + this.lowFreqMinimumShouldMatch = lowFreqMinimumShouldMatch; return this; } @@ -159,11 +170,18 @@ public class CommonTermsQueryBuilder extends BaseQueryBuilder implements Boostab if (cutoffFrequency != null) { builder.field("cutoff_frequency", cutoffFrequency); } - if (minimumShouldMatch != null) { - builder.field("minimum_should_match", minimumShouldMatch); + if (lowFreqMinimumShouldMatch != null || highFreqMinimumShouldMatch != null) { + builder.startObject("minimum_should_match"); + if (lowFreqMinimumShouldMatch != null) { + builder.field("low_freq", lowFreqMinimumShouldMatch); + } + if (highFreqMinimumShouldMatch != null) { + builder.field("high_freq", highFreqMinimumShouldMatch); + } + builder.endObject(); } builder.endObject(); builder.endObject(); } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/index/query/CommonTermsQueryParser.java b/src/main/java/org/elasticsearch/index/query/CommonTermsQueryParser.java index 621f39aaf9c..77748d68583 100644 --- a/src/main/java/org/elasticsearch/index/query/CommonTermsQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/CommonTermsQueryParser.java @@ -78,7 +78,8 @@ public class CommonTermsQueryParser implements QueryParser { Object value = null; float boost = 1.0f; String queryAnalyzer = null; - String minimumShouldMatch = null; + String lowFreqMinimumShouldMatch = null; + String highFreqMinimumShouldMatch = null; boolean disableCoords = DEFAULT_DISABLE_COORDS; Occur highFreqOccur = DEFAULT_HIGH_FREQ_OCCUR; Occur lowFreqOccur = DEFAULT_LOW_FREQ_OCCUR; @@ -89,6 +90,23 @@ public class CommonTermsQueryParser implements QueryParser { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) { + String innerFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + innerFieldName = parser.currentName(); + } else if (token.isValue()) { + if ("low_freq".equals(innerFieldName) || "lowFreq".equals(innerFieldName)) { + lowFreqMinimumShouldMatch = parser.text(); + } else if ("high_freq".equals(innerFieldName) || "highFreq".equals(innerFieldName)) { + highFreqMinimumShouldMatch = parser.text(); + } else { + throw new QueryParsingException(parseContext.index(), "[common] query does not support [" + innerFieldName + "] for [" + currentFieldName + "]"); + } + } + } + } } else if (token.isValue()) { if ("query".equals(currentFieldName)) { value = parser.objectText(); @@ -123,7 +141,7 @@ public class CommonTermsQueryParser implements QueryParser { "[common] query requires operator to be either 'and' or 'or', not [" + op + "]"); } } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) { - minimumShouldMatch = parser.textOrNull(); + lowFreqMinimumShouldMatch = parser.text(); } else if ("cutoff_frequency".equals(currentFieldName)) { maxTermFrequency = parser.floatValue(); } else { @@ -148,12 +166,12 @@ public class CommonTermsQueryParser implements QueryParser { } ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords); query.setBoost(boost); - return parseQueryString(query, value.toString(), fieldName, parseContext, queryAnalyzer, minimumShouldMatch); + return parseQueryString(query, value.toString(), fieldName, parseContext, queryAnalyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch); } private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext, - String queryAnalyzer, String minimumShouldMatch) throws IOException { + String queryAnalyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException { FieldMapper mapper = null; String field; @@ -199,7 +217,8 @@ public class CommonTermsQueryParser implements QueryParser { if (count == 0) { return null; } - query.setMinimumNumberShouldMatch(minimumShouldMatch); + query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch); + query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch); return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext); } } diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java index 4cf53028233..54d4b7a750b 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java @@ -166,9 +166,9 @@ public class MatchQueryParser implements QueryParser { if (query instanceof BooleanQuery) { Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch); } else if (query instanceof ExtendedCommonTermsQuery) { - ((ExtendedCommonTermsQuery)query).setMinimumNumberShouldMatch(minimumShouldMatch); + ((ExtendedCommonTermsQuery)query).setLowFreqMinimumNumberShouldMatch(minimumShouldMatch); } query.setBoost(boost); return query; } -} \ No newline at end of file +} diff --git a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java index 0f306cbfe7a..85c7e7802c2 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.index.query.MatchQueryBuilder.Type; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.facet.FacetBuilders; import org.elasticsearch.test.integration.AbstractSharedClusterTest; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.testng.annotations.Test; import java.io.IOException; @@ -145,6 +146,29 @@ public class SimpleQueryTests extends AbstractSharedClusterTest { assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2")); assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3")); + searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the huge fox").lowFreqMinimumShouldMatch("2")).execute().actionGet(); + assertThat(searchResponse.getHits().totalHits(), equalTo(1l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2")); + + searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("3")).execute().actionGet(); + assertThat(searchResponse.getHits().totalHits(), equalTo(2l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1")); + assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2")); + + searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("4")).execute().actionGet(); + assertThat(searchResponse.getHits().totalHits(), equalTo(1l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2")); + + searchResponse = client().prepareSearch().setQuery("{ \"common\" : { \"field1\" : { \"query\" : \"the lazy fox brown\", \"cutoff_frequency\" : 1, \"minimum_should_match\" : { \"high_freq\" : 4 } } } }").execute().actionGet(); + assertThat(searchResponse.getHits().totalHits(), equalTo(1l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2")); + + // Default + searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the lazy fox brown").cutoffFrequency(1)).execute().actionGet(); + assertThat(searchResponse.getHits().totalHits(), equalTo(1l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2")); + + searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the quick brown").cutoffFrequency(3).analyzer("standard")).execute().actionGet(); assertThat(searchResponse.getHits().totalHits(), equalTo(3l)); // standard drops "the" since its a stopword diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java index bc2761ed9bf..15764c19e50 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java @@ -21,6 +21,7 @@ package org.elasticsearch.test.unit.index.query; import com.google.common.collect.Lists; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.queries.BoostingQuery; import org.apache.lucene.queries.FilterClause; import org.apache.lucene.queries.TermsFilter; @@ -2191,4 +2192,37 @@ public class SimpleIndexQueryParserTests { ConstantScoreQuery csq = (ConstantScoreQuery) parsedQuery; assertThat(csq.getFilter(), instanceOf(IntersectsPrefixTreeFilter.class)); } + + @Test + public void testCommonTermsQuery1() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/commonTerms-query1.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(ExtendedCommonTermsQuery.class)); + ExtendedCommonTermsQuery ectQuery = (ExtendedCommonTermsQuery) parsedQuery; + assertThat(ectQuery.getHighFreqMinimumNumberShouldMatch(), nullValue()); + assertThat(ectQuery.getLowFreqMinimumNumberShouldMatch(), equalTo("2")); + } + + @Test + public void testCommonTermsQuery2() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/commonTerms-query2.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(ExtendedCommonTermsQuery.class)); + ExtendedCommonTermsQuery ectQuery = (ExtendedCommonTermsQuery) parsedQuery; + assertThat(ectQuery.getHighFreqMinimumNumberShouldMatch(), equalTo("50%")); + assertThat(ectQuery.getLowFreqMinimumNumberShouldMatch(), equalTo("5<20%")); + } + + @Test + public void testCommonTermsQuery3() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/commonTerms-query3.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(ExtendedCommonTermsQuery.class)); + ExtendedCommonTermsQuery ectQuery = (ExtendedCommonTermsQuery) parsedQuery; + assertThat(ectQuery.getHighFreqMinimumNumberShouldMatch(), nullValue()); + assertThat(ectQuery.getLowFreqMinimumNumberShouldMatch(), equalTo("2")); + } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query1.json b/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query1.json new file mode 100644 index 00000000000..b2728dac09d --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query1.json @@ -0,0 +1,11 @@ +{ + "common" : { + "dogs" : { + "query" : "buck mia tom", + "cutoff_frequency" : 1, + "minimum_should_match" : { + "low_freq" : 2 + } + } + } +} diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query2.json b/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query2.json new file mode 100644 index 00000000000..aeb281bb759 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query2.json @@ -0,0 +1,11 @@ +{ + "common" : { + "dogs" : { + "query" : "buck mia tom", + "minimum_should_match" : { + "high_freq" : "50%", + "low_freq" : "5<20%" + } + } + } +} diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query3.json b/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query3.json new file mode 100644 index 00000000000..f276209ffc7 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/commonTerms-query3.json @@ -0,0 +1,9 @@ +{ + "common" : { + "dogs" : { + "query" : "buck mia tom", + "cutoff_frequency" : 1, + "minimum_should_match" : 2 + } + } +}