Use FieldMapper to create the low level term queries in CommonTermQuery

Closes #5258
This commit is contained in:
Simon Willnauer 2014-02-26 18:42:01 +01:00
parent 3839454168
commit fe9de7fba2
4 changed files with 136 additions and 21 deletions

View File

@ -19,8 +19,20 @@
package org.apache.lucene.queries;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.mapper.FieldMapper;
import java.io.IOException;
/**
* Extended version of {@link CommonTermsQuery} that allows to pass in a
@ -29,12 +41,11 @@ import org.elasticsearch.common.lucene.search.Queries;
*/
public class ExtendedCommonTermsQuery extends CommonTermsQuery {
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord) {
super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
}
private final FieldMapper<?> mapper;
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord, FieldMapper<?> mapper) {
super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
this.mapper = mapper;
}
private String lowFreqMinNumShouldMatchSpec;
@ -72,4 +83,94 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery {
public String getLowFreqMinimumNumberShouldMatchSpec() {
return lowFreqMinNumShouldMatchSpec;
}
// LUCENE-UPGRADE: remove this method if on 4.8
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (this.terms.isEmpty()) {
return new BooleanQuery();
} else if (this.terms.size() == 1) {
final Query tq = newTermQuery(this.terms.get(0), null);
tq.setBoost(getBoost());
return tq;
}
return super.rewrite(reader);
}
// LUCENE-UPGRADE: remove this method if on 4.8
@Override
protected Query buildQuery(final int maxDoc,
final TermContext[] contextArray, final Term[] queryTerms) {
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
BooleanQuery highFreq = new BooleanQuery(disableCoord);
highFreq.setBoost(highFreqBoost);
lowFreq.setBoost(lowFreqBoost);
BooleanQuery query = new BooleanQuery(true);
for (int i = 0; i < queryTerms.length; i++) {
TermContext termContext = contextArray[i];
if (termContext == null) {
lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
} else {
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
} else {
lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
}
}
}
final int numLowFreqClauses = lowFreq.clauses().size();
final int numHighFreqClauses = highFreq.clauses().size();
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
}
if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
highFreq.setMinimumNumberShouldMatch(minMustMatch);
}
if (lowFreq.clauses().isEmpty()) {
/*
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
* prevent slow queries.
*/
if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
for (BooleanClause booleanClause : highFreq) {
booleanClause.setOccur(Occur.MUST);
}
}
highFreq.setBoost(getBoost());
return highFreq;
} else if (highFreq.clauses().isEmpty()) {
// only do low freq terms - we don't have high freq terms
lowFreq.setBoost(getBoost());
return lowFreq;
} else {
query.add(highFreq, Occur.SHOULD);
query.add(lowFreq, Occur.MUST);
query.setBoost(getBoost());
return query;
}
}
static {
assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Remove obsolete code after upgrade to lucene 4.8";
}
//@Override
// LUCENE-UPGRADE: remove this method if on 4.8
protected Query newTermQuery(Term term, TermContext context) {
if (mapper == null) {
// this should be super.newTermQuery(term, context) once it's available in the super class
return context == null ? new TermQuery(term) : new TermQuery(term, context);
}
final Query query = mapper.queryStringTermQuery(term);
if (query == null) {
// this should be super.newTermQuery(term, context) once it's available in the super class
return context == null ? new TermQuery(term) : new TermQuery(term, context);
} else {
return query;
}
}
}

View File

@ -166,19 +166,6 @@ public class CommonTermsQueryParser implements QueryParser {
if (value == null) {
throw new QueryParsingException(parseContext.index(), "No text specified for text query");
}
ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords);
commonsQuery.setBoost(boost);
Query query = parseQueryString(commonsQuery, value.toString(), fieldName, parseContext, queryAnalyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
if (queryName != null) {
parseContext.addNamedQuery(queryName, query);
}
return query;
}
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext,
String queryAnalyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
FieldMapper<?> mapper = null;
String field;
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
@ -207,6 +194,18 @@ public class CommonTermsQueryParser implements QueryParser {
}
}
ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords, mapper);
commonsQuery.setBoost(boost);
Query query = parseQueryString(commonsQuery, value.toString(), field, parseContext, analyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch, smartNameFieldMappers);
if (queryName != null) {
parseContext.addNamedQuery(queryName, query);
}
return query;
}
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch, MapperService.SmartNameFieldMappers smartNameFieldMappers) throws IOException {
// Logic similar to QueryParser#getFieldQuery
TokenStream source = analyzer.tokenStream(field, queryString.toString());
int count = 0;

View File

@ -209,7 +209,7 @@ public class MatchQuery {
if (commonTermsCutoff == null) {
query = builder.createBooleanQuery(field, value.toString(), occur);
} else {
query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff);
query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff, mapper);
}
break;
case PHRASE:
@ -276,11 +276,11 @@ public class MatchQuery {
return query;
}
public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, FieldMapper<?> mapper) {
Query booleanQuery = createBooleanQuery(field, queryText, Occur.SHOULD);
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) booleanQuery;
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled());
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled(), mapper);
for (BooleanClause clause : bq.clauses()) {
if (!(clause.getQuery() instanceof TermQuery)) {
return booleanQuery;

View File

@ -252,6 +252,21 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
}
}
@Test
public void testCommonTermsQueryOnAllField() throws Exception {
client().admin().indices().prepareCreate("test")
.addMapping("type1", "message", "type=string", "comment", "type=string,boost=5.0")
.setSettings(SETTING_NUMBER_OF_SHARDS, 1).get();
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("message", "test message", "comment", "whatever"),
client().prepareIndex("test", "type1", "2").setSource("message", "hello world", "comment", "test comment"));
SearchResponse searchResponse = client().prepareSearch().setQuery(commonTerms("_all", "test")).get();
assertHitCount(searchResponse, 2l);
assertFirstHit(searchResponse, hasId("2"));
assertSecondHit(searchResponse, hasId("1"));
assertThat(searchResponse.getHits().getHits()[0].getScore(), greaterThan(searchResponse.getHits().getHits()[1].getScore()));
}
@Test
public void testCommonTermsQuery() throws Exception {
client().admin().indices().prepareCreate("test")