Use FieldMapper to create the low level term queries in CommonTermQuery
Closes #5258
This commit is contained in:
parent
3839454168
commit
fe9de7fba2
|
@ -19,8 +19,20 @@
|
|||
|
||||
package org.apache.lucene.queries;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Extended version of {@link CommonTermsQuery} that allows to pass in a
|
||||
|
@ -29,12 +41,11 @@ import org.elasticsearch.common.lucene.search.Queries;
|
|||
*/
|
||||
public class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
||||
|
||||
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord) {
|
||||
super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
|
||||
}
|
||||
private final FieldMapper<?> mapper;
|
||||
|
||||
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
|
||||
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
|
||||
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord, FieldMapper<?> mapper) {
|
||||
super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
|
||||
this.mapper = mapper;
|
||||
}
|
||||
|
||||
private String lowFreqMinNumShouldMatchSpec;
|
||||
|
@ -72,4 +83,94 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
|||
public String getLowFreqMinimumNumberShouldMatchSpec() {
|
||||
return lowFreqMinNumShouldMatchSpec;
|
||||
}
|
||||
|
||||
// LUCENE-UPGRADE: remove this method if on 4.8
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if (this.terms.isEmpty()) {
|
||||
return new BooleanQuery();
|
||||
} else if (this.terms.size() == 1) {
|
||||
final Query tq = newTermQuery(this.terms.get(0), null);
|
||||
tq.setBoost(getBoost());
|
||||
return tq;
|
||||
}
|
||||
return super.rewrite(reader);
|
||||
}
|
||||
|
||||
// LUCENE-UPGRADE: remove this method if on 4.8
|
||||
@Override
|
||||
protected Query buildQuery(final int maxDoc,
|
||||
final TermContext[] contextArray, final Term[] queryTerms) {
|
||||
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
|
||||
BooleanQuery highFreq = new BooleanQuery(disableCoord);
|
||||
highFreq.setBoost(highFreqBoost);
|
||||
lowFreq.setBoost(lowFreqBoost);
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
TermContext termContext = contextArray[i];
|
||||
if (termContext == null) {
|
||||
lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
|
||||
} else {
|
||||
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|
||||
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
|
||||
highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
|
||||
} else {
|
||||
lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
final int numLowFreqClauses = lowFreq.clauses().size();
|
||||
final int numHighFreqClauses = highFreq.clauses().size();
|
||||
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
|
||||
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
|
||||
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
|
||||
int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
|
||||
highFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
if (lowFreq.clauses().isEmpty()) {
|
||||
/*
|
||||
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
|
||||
* prevent slow queries.
|
||||
*/
|
||||
if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
|
||||
for (BooleanClause booleanClause : highFreq) {
|
||||
booleanClause.setOccur(Occur.MUST);
|
||||
}
|
||||
}
|
||||
highFreq.setBoost(getBoost());
|
||||
return highFreq;
|
||||
} else if (highFreq.clauses().isEmpty()) {
|
||||
// only do low freq terms - we don't have high freq terms
|
||||
lowFreq.setBoost(getBoost());
|
||||
return lowFreq;
|
||||
} else {
|
||||
query.add(highFreq, Occur.SHOULD);
|
||||
query.add(lowFreq, Occur.MUST);
|
||||
query.setBoost(getBoost());
|
||||
return query;
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Remove obsolete code after upgrade to lucene 4.8";
|
||||
}
|
||||
|
||||
//@Override
|
||||
// LUCENE-UPGRADE: remove this method if on 4.8
|
||||
protected Query newTermQuery(Term term, TermContext context) {
|
||||
if (mapper == null) {
|
||||
// this should be super.newTermQuery(term, context) once it's available in the super class
|
||||
return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
||||
}
|
||||
final Query query = mapper.queryStringTermQuery(term);
|
||||
if (query == null) {
|
||||
// this should be super.newTermQuery(term, context) once it's available in the super class
|
||||
return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
||||
} else {
|
||||
return query;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -166,19 +166,6 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
if (value == null) {
|
||||
throw new QueryParsingException(parseContext.index(), "No text specified for text query");
|
||||
}
|
||||
ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords);
|
||||
commonsQuery.setBoost(boost);
|
||||
Query query = parseQueryString(commonsQuery, value.toString(), fieldName, parseContext, queryAnalyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
|
||||
if (queryName != null) {
|
||||
parseContext.addNamedQuery(queryName, query);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
|
||||
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext,
|
||||
String queryAnalyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
|
||||
|
||||
FieldMapper<?> mapper = null;
|
||||
String field;
|
||||
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
|
||||
|
@ -207,6 +194,18 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
}
|
||||
}
|
||||
|
||||
ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords, mapper);
|
||||
commonsQuery.setBoost(boost);
|
||||
Query query = parseQueryString(commonsQuery, value.toString(), field, parseContext, analyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch, smartNameFieldMappers);
|
||||
if (queryName != null) {
|
||||
parseContext.addNamedQuery(queryName, query);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
|
||||
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
|
||||
Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch, MapperService.SmartNameFieldMappers smartNameFieldMappers) throws IOException {
|
||||
// Logic similar to QueryParser#getFieldQuery
|
||||
TokenStream source = analyzer.tokenStream(field, queryString.toString());
|
||||
int count = 0;
|
||||
|
|
|
@ -209,7 +209,7 @@ public class MatchQuery {
|
|||
if (commonTermsCutoff == null) {
|
||||
query = builder.createBooleanQuery(field, value.toString(), occur);
|
||||
} else {
|
||||
query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff);
|
||||
query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff, mapper);
|
||||
}
|
||||
break;
|
||||
case PHRASE:
|
||||
|
@ -276,11 +276,11 @@ public class MatchQuery {
|
|||
return query;
|
||||
}
|
||||
|
||||
public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
|
||||
public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, FieldMapper<?> mapper) {
|
||||
Query booleanQuery = createBooleanQuery(field, queryText, Occur.SHOULD);
|
||||
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
|
||||
BooleanQuery bq = (BooleanQuery) booleanQuery;
|
||||
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled());
|
||||
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled(), mapper);
|
||||
for (BooleanClause clause : bq.clauses()) {
|
||||
if (!(clause.getQuery() instanceof TermQuery)) {
|
||||
return booleanQuery;
|
||||
|
|
|
@ -252,6 +252,21 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommonTermsQueryOnAllField() throws Exception {
|
||||
client().admin().indices().prepareCreate("test")
|
||||
.addMapping("type1", "message", "type=string", "comment", "type=string,boost=5.0")
|
||||
.setSettings(SETTING_NUMBER_OF_SHARDS, 1).get();
|
||||
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("message", "test message", "comment", "whatever"),
|
||||
client().prepareIndex("test", "type1", "2").setSource("message", "hello world", "comment", "test comment"));
|
||||
|
||||
SearchResponse searchResponse = client().prepareSearch().setQuery(commonTerms("_all", "test")).get();
|
||||
assertHitCount(searchResponse, 2l);
|
||||
assertFirstHit(searchResponse, hasId("2"));
|
||||
assertSecondHit(searchResponse, hasId("1"));
|
||||
assertThat(searchResponse.getHits().getHits()[0].getScore(), greaterThan(searchResponse.getHits().getHits()[1].getScore()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommonTermsQuery() throws Exception {
|
||||
client().admin().indices().prepareCreate("test")
|
||||
|
|
Loading…
Reference in New Issue