Add support for "high_freq" and "low_freq" parameters for Common Query
"minimum_should_match" parameter. High freq parameters is used when the query has only high frequent terms. Closes #3188
This commit is contained in:
parent
8363fcf281
commit
d41c37fdfa
|
@ -17,7 +17,14 @@ package org.apache.lucene.queries;
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
|
||||
/**
|
||||
|
@ -35,18 +42,97 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
|||
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
|
||||
}
|
||||
|
||||
private String minNumShouldMatchSpec;
|
||||
private String lowFreqMinNumShouldMatchSpec;
|
||||
private String highFreqMinNumShouldMatchSpec;
|
||||
|
||||
@Override
|
||||
protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
|
||||
if (minNumShouldMatchSpec == null) {
|
||||
return calcMinimumNumberShouldMatch(lowFreqMinNumShouldMatchSpec, numOptional);
|
||||
}
|
||||
|
||||
protected int calcMinimumNumberShouldMatch(String spec, int numOptional) {
|
||||
if (spec == null) {
|
||||
return 0;
|
||||
}
|
||||
return Queries.calculateMinShouldMatch(numOptional, minNumShouldMatchSpec);
|
||||
return Queries.calculateMinShouldMatch(numOptional, spec);
|
||||
}
|
||||
|
||||
public void setMinimumNumberShouldMatch(String spec) {
|
||||
this.minNumShouldMatchSpec = spec;
|
||||
protected int calcHighFreqMinimumNumberShouldMatch(int numOptional) {
|
||||
return calcMinimumNumberShouldMatch(highFreqMinNumShouldMatchSpec, numOptional);
|
||||
}
|
||||
|
||||
public void setHighFreqMinimumNumberShouldMatch(String spec) {
|
||||
this.highFreqMinNumShouldMatchSpec = spec;
|
||||
}
|
||||
|
||||
public String getHighFreqMinimumNumberShouldMatch() {
|
||||
return highFreqMinNumShouldMatchSpec;
|
||||
}
|
||||
|
||||
public void setLowFreqMinimumNumberShouldMatch(String spec) {
|
||||
this.lowFreqMinNumShouldMatchSpec = spec;
|
||||
}
|
||||
|
||||
public String getLowFreqMinimumNumberShouldMatch() {
|
||||
return lowFreqMinNumShouldMatchSpec;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query buildQuery(final int maxDoc, final TermContext[] contextArray, final Term[] queryTerms) {
|
||||
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
|
||||
BooleanQuery highFreq = new BooleanQuery(disableCoord);
|
||||
highFreq.setBoost(highFreqBoost);
|
||||
lowFreq.setBoost(lowFreqBoost);
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
|
||||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
TermContext termContext = contextArray[i];
|
||||
if (termContext == null) {
|
||||
lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
|
||||
} else {
|
||||
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency) || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
|
||||
highFreq.add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
|
||||
} else {
|
||||
lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final int numLowFreqClauses = lowFreq.clauses().size(),
|
||||
numHighFreqClauses = highFreq.clauses().size();
|
||||
|
||||
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
|
||||
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
|
||||
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
|
||||
if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
|
||||
int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
|
||||
highFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
|
||||
if (lowFreq.clauses().isEmpty()) {
|
||||
/*
|
||||
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
|
||||
* prevent slow queries.
|
||||
* Only if a specic high_freq should_match is not specified.
|
||||
*/
|
||||
if (highFreqMinNumShouldMatchSpec == null && highFreqOccur != Occur.MUST) {
|
||||
for (BooleanClause booleanClause : highFreq) {
|
||||
booleanClause.setOccur(Occur.MUST);
|
||||
}
|
||||
}
|
||||
highFreq.setBoost(getBoost());
|
||||
return highFreq;
|
||||
} else if (highFreq.clauses().isEmpty()) {
|
||||
// only do low freq terms - we don't have high freq terms
|
||||
lowFreq.setBoost(getBoost());
|
||||
return lowFreq;
|
||||
} else {
|
||||
query.add(highFreq, Occur.SHOULD);
|
||||
query.add(lowFreq, Occur.MUST);
|
||||
query.setBoost(getBoost());
|
||||
return query;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,9 @@ public class CommonTermsQueryBuilder extends BaseQueryBuilder implements Boostab
|
|||
|
||||
private Float boost = null;
|
||||
|
||||
private String minimumShouldMatch = null;
|
||||
private String lowFreqMinimumShouldMatch = null;
|
||||
|
||||
private String highFreqMinimumShouldMatch = null;
|
||||
|
||||
private Boolean disableCoords = null;
|
||||
|
||||
|
@ -127,11 +129,20 @@ public class CommonTermsQueryBuilder extends BaseQueryBuilder implements Boostab
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets the minimum number of query terms that need to match in order to
|
||||
* Sets the minimum number of high frequent query terms that need to match in order to
|
||||
* produce a hit when there are no low frequen terms.
|
||||
*/
|
||||
public CommonTermsQueryBuilder highFreqMinimumShouldMatch(String highFreqMinimumShouldMatch) {
|
||||
this.highFreqMinimumShouldMatch = highFreqMinimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the minimum number of low frequent query terms that need to match in order to
|
||||
* produce a hit.
|
||||
*/
|
||||
public CommonTermsQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
public CommonTermsQueryBuilder lowFreqMinimumShouldMatch(String lowFreqMinimumShouldMatch) {
|
||||
this.lowFreqMinimumShouldMatch = lowFreqMinimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -159,8 +170,15 @@ public class CommonTermsQueryBuilder extends BaseQueryBuilder implements Boostab
|
|||
if (cutoffFrequency != null) {
|
||||
builder.field("cutoff_frequency", cutoffFrequency);
|
||||
}
|
||||
if (minimumShouldMatch != null) {
|
||||
builder.field("minimum_should_match", minimumShouldMatch);
|
||||
if (lowFreqMinimumShouldMatch != null || highFreqMinimumShouldMatch != null) {
|
||||
builder.startObject("minimum_should_match");
|
||||
if (lowFreqMinimumShouldMatch != null) {
|
||||
builder.field("low_freq", lowFreqMinimumShouldMatch);
|
||||
}
|
||||
if (highFreqMinimumShouldMatch != null) {
|
||||
builder.field("high_freq", highFreqMinimumShouldMatch);
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
builder.endObject();
|
||||
|
|
|
@ -78,7 +78,8 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
Object value = null;
|
||||
float boost = 1.0f;
|
||||
String queryAnalyzer = null;
|
||||
String minimumShouldMatch = null;
|
||||
String lowFreqMinimumShouldMatch = null;
|
||||
String highFreqMinimumShouldMatch = null;
|
||||
boolean disableCoords = DEFAULT_DISABLE_COORDS;
|
||||
Occur highFreqOccur = DEFAULT_HIGH_FREQ_OCCUR;
|
||||
Occur lowFreqOccur = DEFAULT_LOW_FREQ_OCCUR;
|
||||
|
@ -89,6 +90,23 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
|
||||
String innerFieldName = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
innerFieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("low_freq".equals(innerFieldName) || "lowFreq".equals(innerFieldName)) {
|
||||
lowFreqMinimumShouldMatch = parser.text();
|
||||
} else if ("high_freq".equals(innerFieldName) || "highFreq".equals(innerFieldName)) {
|
||||
highFreqMinimumShouldMatch = parser.text();
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext.index(), "[common] query does not support [" + innerFieldName + "] for [" + currentFieldName + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (token.isValue()) {
|
||||
if ("query".equals(currentFieldName)) {
|
||||
value = parser.objectText();
|
||||
|
@ -123,7 +141,7 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
"[common] query requires operator to be either 'and' or 'or', not [" + op + "]");
|
||||
}
|
||||
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
|
||||
minimumShouldMatch = parser.textOrNull();
|
||||
lowFreqMinimumShouldMatch = parser.text();
|
||||
} else if ("cutoff_frequency".equals(currentFieldName)) {
|
||||
maxTermFrequency = parser.floatValue();
|
||||
} else {
|
||||
|
@ -148,12 +166,12 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
}
|
||||
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords);
|
||||
query.setBoost(boost);
|
||||
return parseQueryString(query, value.toString(), fieldName, parseContext, queryAnalyzer, minimumShouldMatch);
|
||||
return parseQueryString(query, value.toString(), fieldName, parseContext, queryAnalyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
|
||||
}
|
||||
|
||||
|
||||
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext,
|
||||
String queryAnalyzer, String minimumShouldMatch) throws IOException {
|
||||
String queryAnalyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
|
||||
|
||||
FieldMapper<?> mapper = null;
|
||||
String field;
|
||||
|
@ -199,7 +217,8 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
if (count == 0) {
|
||||
return null;
|
||||
}
|
||||
query.setMinimumNumberShouldMatch(minimumShouldMatch);
|
||||
query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
|
||||
query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
|
||||
return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -166,7 +166,7 @@ public class MatchQueryParser implements QueryParser {
|
|||
if (query instanceof BooleanQuery) {
|
||||
Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch);
|
||||
} else if (query instanceof ExtendedCommonTermsQuery) {
|
||||
((ExtendedCommonTermsQuery)query).setMinimumNumberShouldMatch(minimumShouldMatch);
|
||||
((ExtendedCommonTermsQuery)query).setLowFreqMinimumNumberShouldMatch(minimumShouldMatch);
|
||||
}
|
||||
query.setBoost(boost);
|
||||
return query;
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.elasticsearch.index.query.MatchQueryBuilder.Type;
|
|||
import org.elasticsearch.rest.RestStatus;
|
||||
import org.elasticsearch.search.facet.FacetBuilders;
|
||||
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -145,6 +146,29 @@ public class SimpleQueryTests extends AbstractSharedClusterTest {
|
|||
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
|
||||
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
|
||||
|
||||
searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the huge fox").lowFreqMinimumShouldMatch("2")).execute().actionGet();
|
||||
assertThat(searchResponse.getHits().totalHits(), equalTo(1l));
|
||||
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
|
||||
|
||||
searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("3")).execute().actionGet();
|
||||
assertThat(searchResponse.getHits().totalHits(), equalTo(2l));
|
||||
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
|
||||
|
||||
searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("4")).execute().actionGet();
|
||||
assertThat(searchResponse.getHits().totalHits(), equalTo(1l));
|
||||
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
|
||||
|
||||
searchResponse = client().prepareSearch().setQuery("{ \"common\" : { \"field1\" : { \"query\" : \"the lazy fox brown\", \"cutoff_frequency\" : 1, \"minimum_should_match\" : { \"high_freq\" : 4 } } } }").execute().actionGet();
|
||||
assertThat(searchResponse.getHits().totalHits(), equalTo(1l));
|
||||
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
|
||||
|
||||
// Default
|
||||
searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the lazy fox brown").cutoffFrequency(1)).execute().actionGet();
|
||||
assertThat(searchResponse.getHits().totalHits(), equalTo(1l));
|
||||
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
|
||||
|
||||
|
||||
searchResponse = client().prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the quick brown").cutoffFrequency(3).analyzer("standard")).execute().actionGet();
|
||||
assertThat(searchResponse.getHits().totalHits(), equalTo(3l));
|
||||
// standard drops "the" since its a stopword
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.test.unit.index.query;
|
|||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||
import org.apache.lucene.queries.BoostingQuery;
|
||||
import org.apache.lucene.queries.FilterClause;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
|
@ -2191,4 +2192,37 @@ public class SimpleIndexQueryParserTests {
|
|||
ConstantScoreQuery csq = (ConstantScoreQuery) parsedQuery;
|
||||
assertThat(csq.getFilter(), instanceOf(IntersectsPrefixTreeFilter.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommonTermsQuery1() throws IOException {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/commonTerms-query1.json");
|
||||
Query parsedQuery = queryParser.parse(query).query();
|
||||
assertThat(parsedQuery, instanceOf(ExtendedCommonTermsQuery.class));
|
||||
ExtendedCommonTermsQuery ectQuery = (ExtendedCommonTermsQuery) parsedQuery;
|
||||
assertThat(ectQuery.getHighFreqMinimumNumberShouldMatch(), nullValue());
|
||||
assertThat(ectQuery.getLowFreqMinimumNumberShouldMatch(), equalTo("2"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommonTermsQuery2() throws IOException {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/commonTerms-query2.json");
|
||||
Query parsedQuery = queryParser.parse(query).query();
|
||||
assertThat(parsedQuery, instanceOf(ExtendedCommonTermsQuery.class));
|
||||
ExtendedCommonTermsQuery ectQuery = (ExtendedCommonTermsQuery) parsedQuery;
|
||||
assertThat(ectQuery.getHighFreqMinimumNumberShouldMatch(), equalTo("50%"));
|
||||
assertThat(ectQuery.getLowFreqMinimumNumberShouldMatch(), equalTo("5<20%"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommonTermsQuery3() throws IOException {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/commonTerms-query3.json");
|
||||
Query parsedQuery = queryParser.parse(query).query();
|
||||
assertThat(parsedQuery, instanceOf(ExtendedCommonTermsQuery.class));
|
||||
ExtendedCommonTermsQuery ectQuery = (ExtendedCommonTermsQuery) parsedQuery;
|
||||
assertThat(ectQuery.getHighFreqMinimumNumberShouldMatch(), nullValue());
|
||||
assertThat(ectQuery.getLowFreqMinimumNumberShouldMatch(), equalTo("2"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"common" : {
|
||||
"dogs" : {
|
||||
"query" : "buck mia tom",
|
||||
"cutoff_frequency" : 1,
|
||||
"minimum_should_match" : {
|
||||
"low_freq" : 2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"common" : {
|
||||
"dogs" : {
|
||||
"query" : "buck mia tom",
|
||||
"minimum_should_match" : {
|
||||
"high_freq" : "50%",
|
||||
"low_freq" : "5<20%"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"common" : {
|
||||
"dogs" : {
|
||||
"query" : "buck mia tom",
|
||||
"cutoff_frequency" : 1,
|
||||
"minimum_should_match" : 2
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue