Refactors CommonTermsQuery
Refactors CommonTermsQuery analogous to TermQueryBuilder. Still left to do are the tests to compare between builder and actual Lucene query. Relates to #10217 This PR is against the query-refactoring branch.
This commit is contained in:
parent
7afa37c62e
commit
29d16cd1e8
|
@ -19,18 +19,31 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* CommonTermsQuery query is a query that executes high-frequency terms in a
|
||||
* optional sub-query to prevent slow queries due to "common" terms like
|
||||
* stopwords. This query basically builds 2 queries off the {@link #add(Term)
|
||||
* added} terms where low-frequency terms are added to a required boolean clause
|
||||
* stopwords. This query basically builds 2 queries off the
|
||||
* {@link org.apache.lucene.queries.CommonTermsQuery#add(Term) added} terms
|
||||
* where low-frequency terms are added to a required boolean clause
|
||||
* and high-frequency terms are added to an optional boolean clause. The
|
||||
* optional clause is only executed if the required "low-frequency' clause
|
||||
* matches. Scores produced by this query will be slightly different to plain
|
||||
|
@ -46,54 +59,52 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
|
||||
public static final String NAME = "common";
|
||||
|
||||
public static enum Operator {
|
||||
OR, AND
|
||||
}
|
||||
public static final float DEFAULT_CUTOFF_FREQ = 0.01f;
|
||||
|
||||
private final String name;
|
||||
public static final Operator DEFAULT_HIGH_FREQ_OCCUR = Operator.OR;
|
||||
|
||||
public static final Operator DEFAULT_LOW_FREQ_OCCUR = Operator.OR;
|
||||
|
||||
public static final boolean DEFAULT_DISABLE_COORD = true;
|
||||
|
||||
private final String fieldName;
|
||||
|
||||
private final Object text;
|
||||
|
||||
private Operator highFreqOperator = null;
|
||||
private Operator highFreqOperator = DEFAULT_HIGH_FREQ_OCCUR;
|
||||
|
||||
private Operator lowFreqOperator = null;
|
||||
private Operator lowFreqOperator = DEFAULT_LOW_FREQ_OCCUR;
|
||||
|
||||
private String analyzer = null;
|
||||
|
||||
private Float boost = null;
|
||||
private float boost = 1.0f;
|
||||
|
||||
private String lowFreqMinimumShouldMatch = null;
|
||||
|
||||
private String highFreqMinimumShouldMatch = null;
|
||||
|
||||
private Boolean disableCoord = null;
|
||||
private boolean disableCoord = DEFAULT_DISABLE_COORD;
|
||||
|
||||
private Float cutoffFrequency = null;
|
||||
private float cutoffFrequency = DEFAULT_CUTOFF_FREQ;
|
||||
|
||||
private String queryName;
|
||||
|
||||
static final CommonTermsQueryBuilder PROTOTYPE = new CommonTermsQueryBuilder();
|
||||
static final CommonTermsQueryBuilder PROTOTYPE = new CommonTermsQueryBuilder(null, null);
|
||||
|
||||
/**
|
||||
* Constructs a new common terms query.
|
||||
*/
|
||||
public CommonTermsQueryBuilder(String name, Object text) {
|
||||
if (name == null) {
|
||||
throw new IllegalArgumentException("Field name must not be null");
|
||||
}
|
||||
if (text == null) {
|
||||
throw new IllegalArgumentException("Query must not be null");
|
||||
}
|
||||
public CommonTermsQueryBuilder(String fieldName, Object text) {
|
||||
this.fieldName = fieldName;
|
||||
this.text = text;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* private constructor used onyl internally
|
||||
*/
|
||||
private CommonTermsQueryBuilder() {
|
||||
this.text = null;
|
||||
this.name = null;
|
||||
public String fieldName() {
|
||||
return this.fieldName;
|
||||
}
|
||||
|
||||
public Object text() {
|
||||
return this.text;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -102,19 +113,27 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
* <tt>AND</tt>.
|
||||
*/
|
||||
public CommonTermsQueryBuilder highFreqOperator(Operator operator) {
|
||||
this.highFreqOperator = operator;
|
||||
this.highFreqOperator = (operator == null) ? DEFAULT_HIGH_FREQ_OCCUR : operator;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Operator highFreqOperator() {
|
||||
return highFreqOperator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the operator to use for terms with a low document frequency (less
|
||||
* than {@link #cutoffFrequency(float)}. Defaults to <tt>AND</tt>.
|
||||
*/
|
||||
public CommonTermsQueryBuilder lowFreqOperator(Operator operator) {
|
||||
this.lowFreqOperator = operator;
|
||||
this.lowFreqOperator = (operator == null) ? DEFAULT_LOW_FREQ_OCCUR : operator;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Operator lowFreqOperator() {
|
||||
return lowFreqOperator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Explicitly set the analyzer to use. Defaults to use explicit mapping
|
||||
* config for the field, or, if not set, the default search analyzer.
|
||||
|
@ -124,6 +143,10 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
return this;
|
||||
}
|
||||
|
||||
public String analyzer() {
|
||||
return this.analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the boost to apply to the query.
|
||||
*/
|
||||
|
@ -133,18 +156,26 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
return this;
|
||||
}
|
||||
|
||||
public float boost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the cutoff document frequency for high / low frequent terms. A value
|
||||
* in [0..1] (or absolute number >=1) representing the maximum threshold of
|
||||
* a terms document frequency to be considered a low frequency term.
|
||||
* Defaults to
|
||||
* <tt>{@value CommonTermsQueryParser#DEFAULT_MAX_TERM_DOC_FREQ}</tt>
|
||||
* <tt>{@value #DEFAULT_CUTOFF_FREQ}</tt>
|
||||
*/
|
||||
public CommonTermsQueryBuilder cutoffFrequency(float cutoffFrequency) {
|
||||
this.cutoffFrequency = cutoffFrequency;
|
||||
return this;
|
||||
}
|
||||
|
||||
public float cutoffFrequency() {
|
||||
return this.cutoffFrequency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the minimum number of high frequent query terms that need to match in order to
|
||||
* produce a hit when there are no low frequen terms.
|
||||
|
@ -154,6 +185,10 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
return this;
|
||||
}
|
||||
|
||||
public String highFreqMinimumShouldMatch() {
|
||||
return this.highFreqMinimumShouldMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the minimum number of low frequent query terms that need to match in order to
|
||||
* produce a hit.
|
||||
|
@ -163,11 +198,19 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
return this;
|
||||
}
|
||||
|
||||
public String lowFreqMinimumShouldMatch() {
|
||||
return this.lowFreqMinimumShouldMatch;
|
||||
}
|
||||
|
||||
public CommonTermsQueryBuilder disableCoord(boolean disableCoord) {
|
||||
this.disableCoord = disableCoord;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean disableCoord() {
|
||||
return this.disableCoord;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the query name for the filter that can be used when searching for matched_filters per hit.
|
||||
*/
|
||||
|
@ -176,30 +219,24 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
return this;
|
||||
}
|
||||
|
||||
public String queryName() {
|
||||
return this.queryName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(NAME);
|
||||
builder.startObject(name);
|
||||
builder.startObject(fieldName);
|
||||
|
||||
builder.field("query", text);
|
||||
if (disableCoord != null) {
|
||||
builder.field("disable_coord", disableCoord);
|
||||
}
|
||||
if (highFreqOperator != null) {
|
||||
builder.field("high_freq_operator", highFreqOperator.toString());
|
||||
}
|
||||
if (lowFreqOperator != null) {
|
||||
builder.field("low_freq_operator", lowFreqOperator.toString());
|
||||
}
|
||||
if (analyzer != null) {
|
||||
builder.field("analyzer", analyzer);
|
||||
}
|
||||
if (boost != null) {
|
||||
builder.field("boost", boost);
|
||||
}
|
||||
if (cutoffFrequency != null) {
|
||||
builder.field("cutoff_frequency", cutoffFrequency);
|
||||
}
|
||||
if (lowFreqMinimumShouldMatch != null || highFreqMinimumShouldMatch != null) {
|
||||
builder.startObject("minimum_should_match");
|
||||
if (lowFreqMinimumShouldMatch != null) {
|
||||
|
@ -213,7 +250,6 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
if (queryName != null) {
|
||||
builder.field("_name", queryName);
|
||||
}
|
||||
|
||||
builder.endObject();
|
||||
builder.endObject();
|
||||
}
|
||||
|
@ -222,4 +258,135 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQue
|
|||
public String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query toQuery(QueryParseContext parseContext) throws QueryParsingException, IOException {
|
||||
String field;
|
||||
MappedFieldType fieldType = parseContext.fieldMapper(fieldName);
|
||||
if (fieldType != null) {
|
||||
field = fieldType.names().indexName();
|
||||
} else {
|
||||
field = fieldName;
|
||||
}
|
||||
|
||||
Analyzer analyzerObj;
|
||||
if (analyzer == null) {
|
||||
if (fieldType != null) {
|
||||
analyzerObj = parseContext.getSearchAnalyzer(fieldType);
|
||||
} else {
|
||||
analyzerObj = parseContext.mapperService().searchAnalyzer();
|
||||
}
|
||||
} else {
|
||||
analyzerObj = parseContext.mapperService().analysisService().analyzer(analyzer);
|
||||
if (analyzerObj == null) {
|
||||
throw new IllegalArgumentException("no analyzer found for [" + analyzer + "]");
|
||||
}
|
||||
}
|
||||
|
||||
Occur highFreqOccur = highFreqOperator.toBooleanClauseOccur();
|
||||
Occur lowFreqOccur = lowFreqOperator.toBooleanClauseOccur();
|
||||
|
||||
ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, cutoffFrequency, disableCoord, fieldType);
|
||||
commonsQuery.setBoost(boost);
|
||||
Query query = parseQueryString(commonsQuery, text, field, analyzerObj, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
|
||||
if (queryName != null) {
|
||||
parseContext.addNamedQuery(queryName, query);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer,
|
||||
String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
|
||||
// Logic similar to QueryParser#getFieldQuery
|
||||
int count = 0;
|
||||
try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
|
||||
source.reset();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
while (source.incrementToken()) {
|
||||
// UTF-8
|
||||
builder.copyChars(termAtt);
|
||||
query.add(new Term(field, builder.toBytesRef()));
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) {
|
||||
return null;
|
||||
}
|
||||
query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
|
||||
query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryValidationException validate() {
|
||||
QueryValidationException validationException = null;
|
||||
if (Strings.isEmpty(this.fieldName)) {
|
||||
validationException = QueryValidationException.addValidationError("field name cannot be null or empty.", validationException);
|
||||
}
|
||||
if (this.text == null) {
|
||||
validationException = QueryValidationException.addValidationError("query text cannot be null", validationException);
|
||||
}
|
||||
return validationException;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CommonTermsQueryBuilder readFrom(StreamInput in) throws IOException {
|
||||
CommonTermsQueryBuilder commonTermsQueryBuilder = new CommonTermsQueryBuilder(in.readString(), in.readGenericValue());
|
||||
commonTermsQueryBuilder.highFreqOperator = Operator.readOperatorFrom(in);
|
||||
commonTermsQueryBuilder.lowFreqOperator = Operator.readOperatorFrom(in);
|
||||
commonTermsQueryBuilder.analyzer = in.readOptionalString();
|
||||
commonTermsQueryBuilder.boost = in.readFloat();
|
||||
commonTermsQueryBuilder.lowFreqMinimumShouldMatch = in.readOptionalString();
|
||||
commonTermsQueryBuilder.highFreqMinimumShouldMatch = in.readOptionalString();
|
||||
commonTermsQueryBuilder.disableCoord = in.readBoolean();
|
||||
commonTermsQueryBuilder.cutoffFrequency = in.readFloat();
|
||||
commonTermsQueryBuilder.queryName = in.readOptionalString();
|
||||
return commonTermsQueryBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(this.fieldName);
|
||||
out.writeGenericValue(this.text);
|
||||
highFreqOperator.writeTo(out);
|
||||
lowFreqOperator.writeTo(out);
|
||||
out.writeOptionalString(analyzer);
|
||||
out.writeFloat(boost);
|
||||
out.writeOptionalString(lowFreqMinimumShouldMatch);
|
||||
out.writeOptionalString(highFreqMinimumShouldMatch);
|
||||
out.writeBoolean(disableCoord);
|
||||
out.writeFloat(cutoffFrequency);
|
||||
out.writeOptionalString(queryName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(fieldName, text, highFreqOperator, lowFreqOperator, analyzer, boost,
|
||||
lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch, disableCoord, cutoffFrequency, queryName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null || getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
CommonTermsQueryBuilder other = (CommonTermsQueryBuilder) obj;
|
||||
|
||||
return Objects.equals(fieldName, other.fieldName) &&
|
||||
Objects.equals(text, other.text) &&
|
||||
Objects.equals(highFreqOperator, other.highFreqOperator) &&
|
||||
Objects.equals(lowFreqOperator, other.lowFreqOperator) &&
|
||||
Objects.equals(analyzer, other.analyzer) &&
|
||||
Objects.equals(boost, other.boost) &&
|
||||
Objects.equals(lowFreqMinimumShouldMatch, other.lowFreqMinimumShouldMatch) &&
|
||||
Objects.equals(highFreqMinimumShouldMatch, other.highFreqMinimumShouldMatch) &&
|
||||
Objects.equals(disableCoord, other.disableCoord) &&
|
||||
Objects.equals(cutoffFrequency, other.cutoffFrequency) &&
|
||||
Objects.equals(queryName, other.queryName);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,34 +19,15 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CommonTermsQueryParser extends BaseQueryParserTemp {
|
||||
|
||||
static final float DEFAULT_MAX_TERM_DOC_FREQ = 0.01f;
|
||||
|
||||
static final Occur DEFAULT_HIGH_FREQ_OCCUR = Occur.SHOULD;
|
||||
|
||||
static final Occur DEFAULT_LOW_FREQ_OCCUR = Occur.SHOULD;
|
||||
|
||||
static final boolean DEFAULT_DISABLE_COORD = true;
|
||||
|
||||
public class CommonTermsQueryParser extends BaseQueryParser {
|
||||
|
||||
@Inject
|
||||
public CommonTermsQueryParser() {
|
||||
|
@ -58,22 +39,22 @@ public class CommonTermsQueryParser extends BaseQueryParserTemp {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
|
||||
public QueryBuilder fromXContent(QueryParseContext parseContext) throws IOException, QueryParsingException {
|
||||
XContentParser parser = parseContext.parser();
|
||||
XContentParser.Token token = parser.nextToken();
|
||||
if (token != XContentParser.Token.FIELD_NAME) {
|
||||
throw new QueryParsingException(parseContext, "[common] query malformed, no field");
|
||||
}
|
||||
String fieldName = parser.currentName();
|
||||
Object value = null;
|
||||
Object text = null;
|
||||
float boost = 1.0f;
|
||||
String queryAnalyzer = null;
|
||||
String analyzer = null;
|
||||
String lowFreqMinimumShouldMatch = null;
|
||||
String highFreqMinimumShouldMatch = null;
|
||||
boolean disableCoord = DEFAULT_DISABLE_COORD;
|
||||
Occur highFreqOccur = DEFAULT_HIGH_FREQ_OCCUR;
|
||||
Occur lowFreqOccur = DEFAULT_LOW_FREQ_OCCUR;
|
||||
float maxTermFrequency = DEFAULT_MAX_TERM_DOC_FREQ;
|
||||
boolean disableCoord = CommonTermsQueryBuilder.DEFAULT_DISABLE_COORD;
|
||||
Operator highFreqOperator = CommonTermsQueryBuilder.DEFAULT_HIGH_FREQ_OCCUR;
|
||||
Operator lowFreqOperator = CommonTermsQueryBuilder.DEFAULT_LOW_FREQ_OCCUR;
|
||||
float cutoffFrequency = CommonTermsQueryBuilder.DEFAULT_CUTOFF_FREQ;
|
||||
String queryName = null;
|
||||
token = parser.nextToken();
|
||||
if (token == XContentParser.Token.START_OBJECT) {
|
||||
|
@ -103,41 +84,24 @@ public class CommonTermsQueryParser extends BaseQueryParserTemp {
|
|||
}
|
||||
} else if (token.isValue()) {
|
||||
if ("query".equals(currentFieldName)) {
|
||||
value = parser.objectText();
|
||||
text = parser.objectText();
|
||||
} else if ("analyzer".equals(currentFieldName)) {
|
||||
String analyzer = parser.text();
|
||||
analyzer = parser.text();
|
||||
if (parseContext.analysisService().analyzer(analyzer) == null) {
|
||||
throw new QueryParsingException(parseContext, "[common] analyzer [" + parser.text() + "] not found");
|
||||
}
|
||||
queryAnalyzer = analyzer;
|
||||
} else if ("disable_coord".equals(currentFieldName) || "disableCoord".equals(currentFieldName)) {
|
||||
disableCoord = parser.booleanValue();
|
||||
} else if ("boost".equals(currentFieldName)) {
|
||||
boost = parser.floatValue();
|
||||
} else if ("high_freq_operator".equals(currentFieldName) || "highFreqOperator".equals(currentFieldName)) {
|
||||
String op = parser.text();
|
||||
if ("or".equalsIgnoreCase(op)) {
|
||||
highFreqOccur = BooleanClause.Occur.SHOULD;
|
||||
} else if ("and".equalsIgnoreCase(op)) {
|
||||
highFreqOccur = BooleanClause.Occur.MUST;
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext,
|
||||
"[common] query requires operator to be either 'and' or 'or', not [" + op + "]");
|
||||
}
|
||||
highFreqOperator = Operator.fromString(parser.text());
|
||||
} else if ("low_freq_operator".equals(currentFieldName) || "lowFreqOperator".equals(currentFieldName)) {
|
||||
String op = parser.text();
|
||||
if ("or".equalsIgnoreCase(op)) {
|
||||
lowFreqOccur = BooleanClause.Occur.SHOULD;
|
||||
} else if ("and".equalsIgnoreCase(op)) {
|
||||
lowFreqOccur = BooleanClause.Occur.MUST;
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext,
|
||||
"[common] query requires operator to be either 'and' or 'or', not [" + op + "]");
|
||||
}
|
||||
lowFreqOperator = Operator.fromString(parser.text());
|
||||
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
|
||||
lowFreqMinimumShouldMatch = parser.text();
|
||||
} else if ("cutoff_frequency".equals(currentFieldName)) {
|
||||
maxTermFrequency = parser.floatValue();
|
||||
cutoffFrequency = parser.floatValue();
|
||||
} else if ("_name".equals(currentFieldName)) {
|
||||
queryName = parser.text();
|
||||
} else {
|
||||
|
@ -147,7 +111,7 @@ public class CommonTermsQueryParser extends BaseQueryParserTemp {
|
|||
}
|
||||
parser.nextToken();
|
||||
} else {
|
||||
value = parser.objectText();
|
||||
text = parser.objectText();
|
||||
// move to the next token
|
||||
token = parser.nextToken();
|
||||
if (token != XContentParser.Token.END_OBJECT) {
|
||||
|
@ -157,67 +121,20 @@ public class CommonTermsQueryParser extends BaseQueryParserTemp {
|
|||
}
|
||||
}
|
||||
|
||||
if (value == null) {
|
||||
if (text == null) {
|
||||
throw new QueryParsingException(parseContext, "No text specified for text query");
|
||||
}
|
||||
String field;
|
||||
MappedFieldType fieldType = parseContext.fieldMapper(fieldName);
|
||||
if (fieldType != null) {
|
||||
field = fieldType.names().indexName();
|
||||
} else {
|
||||
field = fieldName;
|
||||
}
|
||||
|
||||
Analyzer analyzer = null;
|
||||
if (queryAnalyzer == null) {
|
||||
if (fieldType != null) {
|
||||
analyzer = fieldType.searchAnalyzer();
|
||||
}
|
||||
if (analyzer == null && fieldType != null) {
|
||||
analyzer = parseContext.getSearchAnalyzer(fieldType);
|
||||
}
|
||||
if (analyzer == null) {
|
||||
analyzer = parseContext.mapperService().searchAnalyzer();
|
||||
}
|
||||
} else {
|
||||
analyzer = parseContext.mapperService().analysisService().analyzer(queryAnalyzer);
|
||||
if (analyzer == null) {
|
||||
throw new IllegalArgumentException("No analyzer found for [" + queryAnalyzer + "]");
|
||||
}
|
||||
}
|
||||
|
||||
ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord, fieldType);
|
||||
commonsQuery.setBoost(boost);
|
||||
Query query = parseQueryString(commonsQuery, value.toString(), field, parseContext, analyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
|
||||
if (queryName != null) {
|
||||
parseContext.addNamedQuery(queryName, query);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
|
||||
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
|
||||
Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
|
||||
// Logic similar to QueryParser#getFieldQuery
|
||||
int count = 0;
|
||||
try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
|
||||
source.reset();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
while (source.incrementToken()) {
|
||||
// UTF-8
|
||||
builder.copyChars(termAtt);
|
||||
query.add(new Term(field, builder.toBytesRef()));
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) {
|
||||
return null;
|
||||
}
|
||||
query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
|
||||
query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
|
||||
return query;
|
||||
CommonTermsQueryBuilder commonTermsQuery = new CommonTermsQueryBuilder(fieldName, text)
|
||||
.lowFreqMinimumShouldMatch(lowFreqMinimumShouldMatch)
|
||||
.highFreqMinimumShouldMatch(highFreqMinimumShouldMatch)
|
||||
.analyzer(analyzer)
|
||||
.highFreqOperator(highFreqOperator)
|
||||
.lowFreqOperator(lowFreqOperator)
|
||||
.disableCoord(disableCoord)
|
||||
.cutoffFrequency(cutoffFrequency)
|
||||
.boost(boost)
|
||||
.queryName(queryName);
|
||||
return commonTermsQuery;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public enum Operator implements Writeable {
|
||||
OR(0), AND(1);
|
||||
|
||||
private final int ordinal;
|
||||
|
||||
private static final Operator PROTOTYPE = OR;
|
||||
|
||||
private Operator(int ordinal) {
|
||||
this.ordinal = ordinal;
|
||||
}
|
||||
|
||||
public BooleanClause.Occur toBooleanClauseOccur() {
|
||||
switch (this) {
|
||||
case OR:
|
||||
return BooleanClause.Occur.SHOULD;
|
||||
case AND:
|
||||
return BooleanClause.Occur.MUST;
|
||||
default:
|
||||
throw Operator.newOperatorException(this.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Operator readFrom(StreamInput in) throws IOException {
|
||||
int ord = in.readVInt();
|
||||
for (Operator operator : Operator.values()) {
|
||||
if (operator.ordinal == ord) {
|
||||
return operator;
|
||||
}
|
||||
}
|
||||
throw new ElasticsearchException("unknown serialized operator [" + ord + "]");
|
||||
}
|
||||
|
||||
public static Operator readOperatorFrom(StreamInput in) throws IOException {
|
||||
return PROTOTYPE.readFrom(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeVInt(this.ordinal);
|
||||
}
|
||||
|
||||
public static Operator fromString(String op) {
|
||||
for (Operator operator : Operator.values()) {
|
||||
if (operator.name().equalsIgnoreCase(op)) {
|
||||
return operator;
|
||||
}
|
||||
}
|
||||
throw Operator.newOperatorException(op);
|
||||
}
|
||||
|
||||
private static IllegalArgumentException newOperatorException(String op) {
|
||||
return new IllegalArgumentException("operator needs to be either " + Lists.newArrayList(Operator.values()) +
|
||||
", but not [" + op + "]");
|
||||
}
|
||||
}
|
|
@ -59,11 +59,11 @@ public abstract class QueryBuilders {
|
|||
/**
|
||||
* Creates a common query for the provided field name and text.
|
||||
*
|
||||
* @param name The field name.
|
||||
* @param fieldName The field name.
|
||||
* @param text The query text (to be analyzed).
|
||||
*/
|
||||
public static CommonTermsQueryBuilder commonTermsQuery(String name, Object text) {
|
||||
return new CommonTermsQueryBuilder(name, text);
|
||||
public static CommonTermsQueryBuilder commonTermsQuery(String fieldName, Object text) {
|
||||
return new CommonTermsQueryBuilder(fieldName, text);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class CommonTermsQueryBuilderTest extends BaseQueryTestCase<CommonTermsQueryBuilder> {
|
||||
|
||||
@Override
|
||||
protected CommonTermsQueryBuilder createTestQueryBuilder() {
|
||||
CommonTermsQueryBuilder query;
|
||||
|
||||
// mapped or unmapped field
|
||||
String text = randomAsciiOfLengthBetween(1, 10);
|
||||
if (randomBoolean()) {
|
||||
query = new CommonTermsQueryBuilder(STRING_FIELD_NAME, text);
|
||||
} else {
|
||||
query = new CommonTermsQueryBuilder(randomAsciiOfLengthBetween(1, 10), text);
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
query.cutoffFrequency((float) randomIntBetween(1, 10));
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
query.lowFreqOperator(randomFrom(Operator.values()));
|
||||
}
|
||||
|
||||
// number of low frequency terms that must match
|
||||
if (randomBoolean()) {
|
||||
query.lowFreqMinimumShouldMatch("" + randomIntBetween(1, 5));
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
query.highFreqOperator(randomFrom(Operator.values()));
|
||||
}
|
||||
|
||||
// number of high frequency terms that must match
|
||||
if (randomBoolean()) {
|
||||
query.highFreqMinimumShouldMatch("" + randomIntBetween(1, 5));
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
query.analyzer(randomFrom("simple", "keyword", "whitespace"));
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
query.disableCoord(randomBoolean());
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
query.boost(2.0f / randomIntBetween(1, 20));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
query.queryName(randomAsciiOfLengthBetween(1, 10));
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query createExpectedQuery(CommonTermsQueryBuilder queryBuilder, QueryParseContext context) throws IOException {
|
||||
String fieldName = queryBuilder.fieldName();
|
||||
Analyzer analyzer = context.mapperService().searchAnalyzer();
|
||||
|
||||
// handle mapped field
|
||||
MappedFieldType fieldType = context.fieldMapper(fieldName);
|
||||
if (fieldType != null) {
|
||||
fieldName = fieldType.names().indexName();
|
||||
analyzer = context.getSearchAnalyzer(fieldType);
|
||||
}
|
||||
|
||||
// handle specified analyzer
|
||||
if (queryBuilder.analyzer() != null) {
|
||||
analyzer = context.analysisService().analyzer(queryBuilder.analyzer());
|
||||
}
|
||||
|
||||
Occur highFreqOccur = queryBuilder.highFreqOperator().toBooleanClauseOccur();
|
||||
Occur lowFreqOccur = queryBuilder.lowFreqOperator().toBooleanClauseOccur();
|
||||
|
||||
ExtendedCommonTermsQuery expectedQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, queryBuilder.cutoffFrequency(),
|
||||
queryBuilder.disableCoord(), fieldType);
|
||||
CommonTermsQueryBuilder.parseQueryString(expectedQuery, queryBuilder.text(), fieldName, analyzer,
|
||||
queryBuilder.lowFreqMinimumShouldMatch(), queryBuilder.highFreqMinimumShouldMatch());
|
||||
|
||||
expectedQuery.setBoost(queryBuilder.boost());
|
||||
return expectedQuery;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertLuceneQuery(CommonTermsQueryBuilder queryBuilder, Query query, QueryParseContext context) {
|
||||
if (queryBuilder.queryName() != null) {
|
||||
Query namedQuery = context.copyNamedFilters().get(queryBuilder.queryName());
|
||||
assertThat(namedQuery, equalTo(query));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidate() {
|
||||
CommonTermsQueryBuilder commonTermsQueryBuilder = new CommonTermsQueryBuilder("", "text");
|
||||
assertThat(commonTermsQueryBuilder.validate().validationErrors().size(), is(1));
|
||||
|
||||
commonTermsQueryBuilder = new CommonTermsQueryBuilder("field", null);
|
||||
assertThat(commonTermsQueryBuilder.validate().validationErrors().size(), is(1));
|
||||
|
||||
commonTermsQueryBuilder = new CommonTermsQueryBuilder("field", "text");
|
||||
assertNull(commonTermsQueryBuilder.validate());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoTermsFromQueryString() throws IOException {
|
||||
CommonTermsQueryBuilder builder = new CommonTermsQueryBuilder(STRING_FIELD_NAME, "");
|
||||
assertNull(builder.toQuery(createContext()));
|
||||
}
|
||||
}
|
|
@ -34,7 +34,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.query.*;
|
||||
import org.elasticsearch.index.query.CommonTermsQueryBuilder.Operator;
|
||||
import org.elasticsearch.index.query.MatchQueryBuilder.Type;
|
||||
import org.elasticsearch.rest.RestStatus;
|
||||
import org.elasticsearch.script.Script;
|
||||
|
|
Loading…
Reference in New Issue