Add the ability to ignore or fail on numeric fields when executing more-like-this or fuzzy-like-this queries.

More-like-this and fuzzy-like-this queries expect analyzers which are able to
generate character terms (CharTermAttribute), so unfortunately this doesn't
work with analyzers which generate binary-only terms (BinaryTermAttribute,
the default CharTermAttribute impl being a special BinaryTermAttribute) such as
our analyzers for numeric fields (byte, short, integer, long, float, double but
also date and ip).

To work around this issue, this commits adds a fail_on_unsupported_field
parameter to the more-like-this and fuzzy-like-this parsers. When this parameter
is false, numeric fields will just be ignored and when it is true, an error will
be returned, saying that these queries don't support numeric fields. By default,
this setting is true but the mlt API sets it to true in order not to fail on
documents which contain numeric fields.

Close #3252
This commit is contained in:
Adrien Grand 2013-07-03 19:55:02 +02:00
parent 1bc8f82d0a
commit ffcc710e4e
19 changed files with 341 additions and 42 deletions

View File

@ -160,7 +160,7 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
GetField getField = getResponse.getField(field); GetField getField = getResponse.getField(field);
if (getField != null) { if (getField != null) {
for (Object value : getField.getValues()) { for (Object value : getField.getValues()) {
addMoreLikeThis(request, boolBuilder, getField.getName(), value.toString()); addMoreLikeThis(request, boolBuilder, getField.getName(), value.toString(), true);
} }
it.remove(); it.remove();
} }
@ -282,7 +282,7 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
} }
if (fields.isEmpty() || fields.contains(field.name())) { if (fields.isEmpty() || fields.contains(field.name())) {
addMoreLikeThis(request, boolBuilder, fieldMapper, field); addMoreLikeThis(request, boolBuilder, fieldMapper, field, !fields.isEmpty());
} }
return false; return false;
@ -302,11 +302,11 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
} }
} }
private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, FieldMapper fieldMapper, Field field) { private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, FieldMapper fieldMapper, Field field, boolean failOnUnsupportedField) {
addMoreLikeThis(request, boolBuilder, field.name(), fieldMapper.value(convertField(field)).toString()); addMoreLikeThis(request, boolBuilder, field.name(), fieldMapper.value(convertField(field)).toString(), failOnUnsupportedField);
} }
private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, String fieldName, String likeText) { private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, String fieldName, String likeText, boolean failOnUnsupportedField) {
MoreLikeThisFieldQueryBuilder mlt = moreLikeThisFieldQuery(fieldName) MoreLikeThisFieldQueryBuilder mlt = moreLikeThisFieldQuery(fieldName)
.likeText(likeText) .likeText(likeText)
.percentTermsToMatch(request.percentTermsToMatch()) .percentTermsToMatch(request.percentTermsToMatch())
@ -317,7 +317,8 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
.maxWordLen(request.maxWordLen()) .maxWordLen(request.maxWordLen())
.minTermFreq(request.minTermFreq()) .minTermFreq(request.minTermFreq())
.maxQueryTerms(request.maxQueryTerms()) .maxQueryTerms(request.maxQueryTerms())
.stopWords(request.stopWords()); .stopWords(request.stopWords())
.failOnUnsupportedField(failOnUnsupportedField);
boolBuilder.should(mlt); boolBuilder.should(mlt);
} }

View File

@ -22,6 +22,9 @@ package org.elasticsearch.index.analysis;
import com.google.common.base.Charsets; import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer;
@ -48,6 +51,7 @@ import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -61,10 +65,7 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.io.BufferedReader; import java.io.*;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL; import java.net.URL;
import java.util.*; import java.util.*;
@ -275,4 +276,32 @@ public class Analysis {
return reader; return reader;
} }
/**
* Check whether the provided token stream is able to provide character
* terms.
* <p>Although most analyzers generate character terms (CharTermAttribute),
* some token only contain binary terms (BinaryTermAttribute,
* CharTermAttribute being a special type of BinaryTermAttribute), such as
* {@link NumericTokenStream} and unsuitable for highlighting and
* more-like-this queries which expect character terms.</p>
*/
public static boolean isCharacterTokenStream(TokenStream tokenStream) {
try {
tokenStream.addAttribute(CharTermAttribute.class);
return true;
} catch (IllegalArgumentException e) {
return false;
}
}
/**
* Check whether {@link TokenStream}s generated with <code>analyzer</code>
* provide with character terms.
* @see #isCharacterTokenStream(TokenStream)
*/
public static boolean generatesCharacterTokenStream(Analyzer analyzer, String fieldName) throws IOException {
return isCharacterTokenStream(analyzer.tokenStream(fieldName, new StringReader("")));
}
} }

View File

@ -356,7 +356,7 @@ public class ByteFieldMapper extends NumberFieldMapper<Byte> {
private final NumberFieldMapper mapper; private final NumberFieldMapper mapper;
public CustomByteNumericField(NumberFieldMapper mapper, byte number, FieldType fieldType) { public CustomByteNumericField(NumberFieldMapper mapper, byte number, FieldType fieldType) {
super(mapper, mapper.fieldType.stored() ? number : null, fieldType); super(mapper, number, fieldType);
this.mapper = mapper; this.mapper = mapper;
this.number = number; this.number = number;
} }

View File

@ -354,7 +354,7 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
private final NumberFieldMapper mapper; private final NumberFieldMapper mapper;
public CustomDoubleNumericField(NumberFieldMapper mapper, double number, FieldType fieldType) { public CustomDoubleNumericField(NumberFieldMapper mapper, double number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType); super(mapper, number, fieldType);
this.mapper = mapper; this.mapper = mapper;
this.number = number; this.number = number;
} }

View File

@ -350,7 +350,7 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
private final NumberFieldMapper mapper; private final NumberFieldMapper mapper;
public CustomFloatNumericField(NumberFieldMapper mapper, float number, FieldType fieldType) { public CustomFloatNumericField(NumberFieldMapper mapper, float number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType); super(mapper, number, fieldType);
this.mapper = mapper; this.mapper = mapper;
this.number = number; this.number = number;
} }

View File

@ -353,7 +353,7 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
private final NumberFieldMapper mapper; private final NumberFieldMapper mapper;
public CustomIntegerNumericField(NumberFieldMapper mapper, int number, FieldType fieldType) { public CustomIntegerNumericField(NumberFieldMapper mapper, int number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType); super(mapper, number, fieldType);
this.mapper = mapper; this.mapper = mapper;
this.number = number; this.number = number;
} }

View File

@ -353,7 +353,7 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
private final NumberFieldMapper mapper; private final NumberFieldMapper mapper;
public CustomLongNumericField(NumberFieldMapper mapper, long number, FieldType fieldType) { public CustomLongNumericField(NumberFieldMapper mapper, long number, FieldType fieldType) {
super(mapper, mapper.fieldType.stored() ? number : null, fieldType); super(mapper, number, fieldType);
this.mapper = mapper; this.mapper = mapper;
this.number = number; this.number = number;
} }

View File

@ -358,7 +358,7 @@ public class ShortFieldMapper extends NumberFieldMapper<Short> {
private final NumberFieldMapper mapper; private final NumberFieldMapper mapper;
public CustomShortNumericField(NumberFieldMapper mapper, short number, FieldType fieldType) { public CustomShortNumericField(NumberFieldMapper mapper, short number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType); super(mapper, number, fieldType);
this.mapper = mapper; this.mapper = mapper;
this.number = number; this.number = number;
} }

View File

@ -38,6 +38,7 @@ public class FuzzyLikeThisFieldQueryBuilder extends BaseQueryBuilder implements
private Integer maxQueryTerms; private Integer maxQueryTerms;
private Boolean ignoreTF; private Boolean ignoreTF;
private String analyzer; private String analyzer;
private boolean failOnUnsupportedField;
/** /**
* A fuzzy more like this query on the provided field. * A fuzzy more like this query on the provided field.
@ -89,6 +90,14 @@ public class FuzzyLikeThisFieldQueryBuilder extends BaseQueryBuilder implements
return this; return this;
} }
/**
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
*/
public FuzzyLikeThisFieldQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
return this;
}
@Override @Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException { protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(FuzzyLikeThisFieldQueryParser.NAME); builder.startObject(FuzzyLikeThisFieldQueryParser.NAME);
@ -115,6 +124,9 @@ public class FuzzyLikeThisFieldQueryBuilder extends BaseQueryBuilder implements
if (analyzer != null) { if (analyzer != null) {
builder.field("analyzer", analyzer); builder.field("analyzer", analyzer);
} }
if (!failOnUnsupportedField) {
builder.field("fail_on_unsupported_field", failOnUnsupportedField);
}
builder.endObject(); builder.endObject();
builder.endObject(); builder.endObject();
} }

View File

@ -22,9 +22,11 @@ package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import java.io.IOException; import java.io.IOException;
@ -67,6 +69,7 @@ public class FuzzyLikeThisFieldQueryParser implements QueryParser {
int prefixLength = 0; int prefixLength = 0;
boolean ignoreTF = false; boolean ignoreTF = false;
Analyzer analyzer = null; Analyzer analyzer = null;
boolean failOnUnsupportedField = true;
XContentParser.Token token = parser.nextToken(); XContentParser.Token token = parser.nextToken();
if (token != XContentParser.Token.FIELD_NAME) { if (token != XContentParser.Token.FIELD_NAME) {
@ -100,6 +103,8 @@ public class FuzzyLikeThisFieldQueryParser implements QueryParser {
prefixLength = parser.intValue(); prefixLength = parser.intValue();
} else if ("analyzer".equals(currentFieldName)) { } else if ("analyzer".equals(currentFieldName)) {
analyzer = parseContext.analysisService().analyzer(parser.text()); analyzer = parseContext.analysisService().analyzer(parser.text());
} else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) {
failOnUnsupportedField = parser.booleanValue();
} else { } else {
throw new QueryParsingException(parseContext.index(), "[flt_field] query does not support [" + currentFieldName + "]"); throw new QueryParsingException(parseContext.index(), "[flt_field] query does not support [" + currentFieldName + "]");
} }
@ -122,6 +127,13 @@ public class FuzzyLikeThisFieldQueryParser implements QueryParser {
if (analyzer == null) { if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer(); analyzer = parseContext.mapperService().searchAnalyzer();
} }
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new ElasticSearchIllegalArgumentException("fuzzy_like_this_field doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
return null;
}
}
FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer); FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer);
query.addTerms(likeText, fieldName, minSimilarity, prefixLength); query.addTerms(likeText, fieldName, minSimilarity, prefixLength);

View File

@ -38,6 +38,7 @@ public class FuzzyLikeThisQueryBuilder extends BaseQueryBuilder implements Boost
private Integer maxQueryTerms; private Integer maxQueryTerms;
private Boolean ignoreTF; private Boolean ignoreTF;
private String analyzer; private String analyzer;
private boolean failOnUnsupportedField = true;;
/** /**
* Constructs a new fuzzy like this query which uses the "_all" field. * Constructs a new fuzzy like this query which uses the "_all" field.
@ -96,6 +97,14 @@ public class FuzzyLikeThisQueryBuilder extends BaseQueryBuilder implements Boost
return this; return this;
} }
/**
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
*/
public FuzzyLikeThisQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
return this;
}
@Override @Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException { protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(FuzzyLikeThisQueryParser.NAME); builder.startObject(FuzzyLikeThisQueryParser.NAME);
@ -128,6 +137,9 @@ public class FuzzyLikeThisQueryBuilder extends BaseQueryBuilder implements Boost
if (analyzer != null) { if (analyzer != null) {
builder.field("analyzer", analyzer); builder.field("analyzer", analyzer);
} }
if (!failOnUnsupportedField) {
builder.field("fail_on_unsupported_field", failOnUnsupportedField);
}
builder.endObject(); builder.endObject();
} }
} }

View File

@ -19,14 +19,18 @@
package org.elasticsearch.index.query; package org.elasticsearch.index.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
@ -66,6 +70,7 @@ public class FuzzyLikeThisQueryParser implements QueryParser {
int prefixLength = 0; int prefixLength = 0;
boolean ignoreTF = false; boolean ignoreTF = false;
Analyzer analyzer = null; Analyzer analyzer = null;
boolean failOnUnsupportedField = true;
XContentParser.Token token; XContentParser.Token token;
String currentFieldName = null; String currentFieldName = null;
@ -87,12 +92,14 @@ public class FuzzyLikeThisQueryParser implements QueryParser {
prefixLength = parser.intValue(); prefixLength = parser.intValue();
} else if ("analyzer".equals(currentFieldName)) { } else if ("analyzer".equals(currentFieldName)) {
analyzer = parseContext.analysisService().analyzer(parser.text()); analyzer = parseContext.analysisService().analyzer(parser.text());
} else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) {
failOnUnsupportedField = parser.booleanValue();
} else { } else {
throw new QueryParsingException(parseContext.index(), "[flt] query does not support [" + currentFieldName + "]"); throw new QueryParsingException(parseContext.index(), "[flt] query does not support [" + currentFieldName + "]");
} }
} else if (token == XContentParser.Token.START_ARRAY) { } else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) { if ("fields".equals(currentFieldName)) {
fields = Lists.newArrayList(); fields = Lists.newLinkedList();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
fields.add(parseContext.indexName(parser.text())); fields.add(parseContext.indexName(parser.text()));
} }
@ -112,13 +119,26 @@ public class FuzzyLikeThisQueryParser implements QueryParser {
FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer); FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer);
if (fields == null) { if (fields == null) {
// add the default _all field fields = Lists.newArrayList(parseContext.defaultField());
query.addTerms(likeText, parseContext.defaultField(), minSimilarity, prefixLength); } else if (fields.isEmpty()) {
} else { throw new QueryParsingException(parseContext.index(), "fuzzy_like_this requires 'fields' to be non-empty");
for (String field : fields) { }
query.addTerms(likeText, field, minSimilarity, prefixLength); for (Iterator<String> it = fields.iterator(); it.hasNext(); ) {
final String fieldName = it.next();
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new ElasticSearchIllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
it.remove();
}
} }
} }
if (fields.isEmpty()) {
return null;
}
for (String field : fields) {
query.addTerms(likeText, field, minSimilarity, prefixLength);
}
query.setBoost(boost); query.setBoost(boost);
query.setIgnoreTF(ignoreTF); query.setIgnoreTF(ignoreTF);

View File

@ -44,6 +44,7 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B
private float boostTerms = -1; private float boostTerms = -1;
private float boost = -1; private float boost = -1;
private String analyzer; private String analyzer;
private boolean failOnUnsupportedField;
/** /**
* A more like this query that runs against a specific field. * A more like this query that runs against a specific field.
@ -157,6 +158,14 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B
return this; return this;
} }
/**
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
*/
public MoreLikeThisFieldQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
return this;
}
@Override @Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException { protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(MoreLikeThisFieldQueryParser.NAME); builder.startObject(MoreLikeThisFieldQueryParser.NAME);
@ -202,6 +211,9 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B
if (analyzer != null) { if (analyzer != null) {
builder.field("analyzer", analyzer); builder.field("analyzer", analyzer);
} }
if (!failOnUnsupportedField) {
builder.field("fail_on_unsupported_field", failOnUnsupportedField);
}
builder.endObject(); builder.endObject();
builder.endObject(); builder.endObject();
} }

View File

@ -22,10 +22,12 @@ package org.elasticsearch.index.query;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery; import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import java.io.IOException; import java.io.IOException;
@ -65,6 +67,7 @@ public class MoreLikeThisFieldQueryParser implements QueryParser {
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery(); MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
mltQuery.setSimilarity(parseContext.searchSimilarity()); mltQuery.setSimilarity(parseContext.searchSimilarity());
Analyzer analyzer = null; Analyzer analyzer = null;
boolean failOnUnsupportedField = true;
String currentFieldName = null; String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
@ -94,6 +97,8 @@ public class MoreLikeThisFieldQueryParser implements QueryParser {
analyzer = parseContext.analysisService().analyzer(parser.text()); analyzer = parseContext.analysisService().analyzer(parser.text());
} else if ("boost".equals(currentFieldName)) { } else if ("boost".equals(currentFieldName)) {
mltQuery.setBoost(parser.floatValue()); mltQuery.setBoost(parser.floatValue());
} else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) {
failOnUnsupportedField = parser.booleanValue();
} else { } else {
throw new QueryParsingException(parseContext.index(), "[mlt_field] query does not support [" + currentFieldName + "]"); throw new QueryParsingException(parseContext.index(), "[mlt_field] query does not support [" + currentFieldName + "]");
} }
@ -130,6 +135,13 @@ public class MoreLikeThisFieldQueryParser implements QueryParser {
if (analyzer == null) { if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer(); analyzer = parseContext.mapperService().searchAnalyzer();
} }
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new ElasticSearchIllegalArgumentException("more_like_this_field doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
return null;
}
}
mltQuery.setAnalyzer(analyzer); mltQuery.setAnalyzer(analyzer);
mltQuery.setMoreLikeFields(new String[]{fieldName}); mltQuery.setMoreLikeFields(new String[]{fieldName});
return wrapSmartNameQuery(mltQuery, smartNameFieldMappers, parseContext); return wrapSmartNameQuery(mltQuery, smartNameFieldMappers, parseContext);

View File

@ -45,6 +45,7 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
private float boostTerms = -1; private float boostTerms = -1;
private float boost = -1; private float boost = -1;
private String analyzer; private String analyzer;
private boolean failOnUnsupportedField = true;
/** /**
* Constructs a new more like this query which uses the "_all" field. * Constructs a new more like this query which uses the "_all" field.
@ -165,6 +166,14 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
return this; return this;
} }
/**
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
*/
public MoreLikeThisQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
return this;
}
@Override @Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException { protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(MoreLikeThisQueryParser.NAME); builder.startObject(MoreLikeThisQueryParser.NAME);
@ -216,6 +225,9 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
if (analyzer != null) { if (analyzer != null) {
builder.field("analyzer", analyzer); builder.field("analyzer", analyzer);
} }
if (!failOnUnsupportedField) {
builder.field("fail_on_unsupported_field", failOnUnsupportedField);
}
builder.endObject(); builder.endObject();
} }
} }

View File

@ -23,11 +23,15 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery; import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
@ -52,9 +56,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
XContentParser parser = parseContext.parser(); XContentParser parser = parseContext.parser();
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery(); MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
mltQuery.setMoreLikeFields(new String[]{parseContext.defaultField()});
mltQuery.setSimilarity(parseContext.searchSimilarity()); mltQuery.setSimilarity(parseContext.searchSimilarity());
Analyzer analyzer = null; Analyzer analyzer = null;
List<String> moreLikeFields = null;
boolean failOnUnsupportedField = true;
XContentParser.Token token; XContentParser.Token token;
String currentFieldName = null; String currentFieldName = null;
@ -85,6 +90,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
analyzer = parseContext.analysisService().analyzer(parser.text()); analyzer = parseContext.analysisService().analyzer(parser.text());
} else if ("boost".equals(currentFieldName)) { } else if ("boost".equals(currentFieldName)) {
mltQuery.setBoost(parser.floatValue()); mltQuery.setBoost(parser.floatValue());
} else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) {
failOnUnsupportedField = parser.booleanValue();
} else { } else {
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]"); throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
} }
@ -96,11 +103,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
} }
mltQuery.setStopWords(stopWords); mltQuery.setStopWords(stopWords);
} else if ("fields".equals(currentFieldName)) { } else if ("fields".equals(currentFieldName)) {
List<String> fields = Lists.newArrayList(); moreLikeFields = Lists.newLinkedList();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
fields.add(parseContext.indexName(parser.text())); moreLikeFields.add(parseContext.indexName(parser.text()));
} }
mltQuery.setMoreLikeFields(fields.toArray(new String[fields.size()]));
} else { } else {
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]"); throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
} }
@ -110,15 +116,33 @@ public class MoreLikeThisQueryParser implements QueryParser {
if (mltQuery.getLikeText() == null) { if (mltQuery.getLikeText() == null) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'like_text' to be specified"); throw new QueryParsingException(parseContext.index(), "more_like_this requires 'like_text' to be specified");
} }
if (mltQuery.getMoreLikeFields() == null || mltQuery.getMoreLikeFields().length == 0) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be specified");
}
if (analyzer == null) { if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer(); analyzer = parseContext.mapperService().searchAnalyzer();
} }
mltQuery.setAnalyzer(analyzer); mltQuery.setAnalyzer(analyzer);
if (moreLikeFields == null) {
moreLikeFields = Lists.newArrayList(parseContext.defaultField());
} else if (moreLikeFields.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
}
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
final String fieldName = it.next();
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new ElasticSearchIllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
it.remove();
}
}
}
if (moreLikeFields.isEmpty()) {
return null;
}
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
return mltQuery; return mltQuery;
} }
} }

View File

@ -0,0 +1,89 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.flt;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
import org.testng.annotations.Test;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyLikeThisFieldQuery;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyLikeThisQuery;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.testng.Assert.fail;
/**
*
*/
public class FuzzyLikeThisActionTests extends AbstractSharedClusterTest {
@Test
// See issue https://github.com/elasticsearch/elasticsearch/issues/3252
public void testNumericField() throws Exception {
prepareCreate("test").execute().actionGet();
ensureGreen();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("string_value", "lucene index").field("int_value", 1).endObject())
.execute().actionGet();
client().prepareIndex("test", "type", "2")
.setSource(jsonBuilder().startObject().field("string_value", "elasticsearch index").field("int_value", 42).endObject())
.execute().actionGet();
refresh();
// flt query with no field -> OK
SearchResponse searchResponse = client().prepareSearch().setQuery(fuzzyLikeThisQuery().likeText("index")).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L));
// flt query with string fields
searchResponse = client().prepareSearch().setQuery(fuzzyLikeThisQuery("string_value").likeText("index")).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L));
// flt query with at least a numeric field -> fail
try {
searchResponse = client().prepareSearch().setQuery(fuzzyLikeThisQuery("string_value", "int_value").likeText("index")).execute().actionGet();
fail();
} catch (SearchPhaseExecutionException e) {
// OK
}
// flt query with at least a numeric field but fail_on_unsupported_field set to false
searchResponse = client().prepareSearch().setQuery(fuzzyLikeThisQuery("string_value", "int_value").likeText("index").failOnUnsupportedField(false)).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L));
// flt field query on a numeric field -> failure
try {
searchResponse = client().prepareSearch().setQuery(fuzzyLikeThisFieldQuery("int_value").likeText("42")).execute().actionGet();
} catch (SearchPhaseExecutionException e) {
// OK
}
// flt field query on a numeric field but fail_on_unsupported_field set to false
searchResponse = client().prepareSearch().setQuery(fuzzyLikeThisFieldQuery("int_value").likeText("42").failOnUnsupportedField(false)).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(0L));
}
}

View File

@ -19,17 +19,8 @@
package org.elasticsearch.test.integration.mlt; package org.elasticsearch.test.integration.mlt;
import static org.elasticsearch.client.Requests.indexAliasesRequest;
import static org.elasticsearch.client.Requests.indexRequest;
import static org.elasticsearch.client.Requests.moreLikeThisRequest;
import static org.elasticsearch.client.Requests.refreshRequest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.ImmutableSettings;
@ -37,6 +28,16 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.test.integration.AbstractSharedClusterTest; import org.elasticsearch.test.integration.AbstractSharedClusterTest;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisFieldQuery;
import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import static org.testng.Assert.fail;
/** /**
* *
*/ */
@ -151,4 +152,67 @@ public class MoreLikeThisActionTests extends AbstractSharedClusterTest {
assertThat(searchResponse, notNullValue()); assertThat(searchResponse, notNullValue());
} }
@Test
// See issue https://github.com/elasticsearch/elasticsearch/issues/3252
public void testNumericField() throws Exception {
prepareCreate("test").execute().actionGet();
ensureGreen();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("string_value", "lucene index").field("int_value", 1).endObject())
.execute().actionGet();
client().prepareIndex("test", "type", "2")
.setSource(jsonBuilder().startObject().field("string_value", "elasticsearch index").field("int_value", 42).endObject())
.execute().actionGet();
refresh();
// Implicit list of fields -> ignore numeric fields
SearchResponse searchResponse = client().prepareMoreLikeThis("test", "type", "1").setMinDocFreq(1).setMinTermFreq(1).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
// Explicit list of fields including numeric fields -> fail
try {
searchResponse = client().prepareMoreLikeThis("test", "type", "1").setField("string_value", "int_value").execute().actionGet();
fail();
} catch (SearchPhaseExecutionException e) {
// OK
}
// mlt query with no field -> OK
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery().likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L));
// mlt query with string fields
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery("string_value").likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L));
// mlt query with at least a numeric field -> fail
try {
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery("string_value", "int_value").likeText("index")).execute().actionGet();
fail();
} catch (SearchPhaseExecutionException e) {
// OK
}
// mlt query with at least a numeric field but fail_on_unsupported_field set to false
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery("string_value", "int_value").likeText("index").minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L));
// mlt field query on a numeric field -> failure
try {
searchResponse = client().prepareSearch().setQuery(moreLikeThisFieldQuery("int_value").likeText("42").minTermFreq(1).minDocFreq(1)).execute().actionGet();
} catch (SearchPhaseExecutionException e) {
// OK
}
// mlt field query on a numeric field but fail_on_unsupported_field set to false
searchResponse = client().prepareSearch().setQuery(moreLikeThisFieldQuery("int_value").likeText("42").minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).execute().actionGet();
assertThat(searchResponse.getFailedShards(), equalTo(0));
assertThat(searchResponse.getHits().getTotalHits(), equalTo(0L));
}
} }

View File

@ -185,9 +185,9 @@ public class LatLonMappingGeoPointTests {
.bytes()); .bytes());
assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
assertThat(doc.rootDoc().getField("point.lat").numericValue(), nullValue()); assertThat(doc.rootDoc().getField("point.lat").fieldType().stored(), is(false));
assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
assertThat(doc.rootDoc().getField("point.lon").numericValue(), nullValue()); assertThat(doc.rootDoc().getField("point.lon").fieldType().stored(), is(false));
assertThat(doc.rootDoc().getField("point.geohash"), nullValue()); assertThat(doc.rootDoc().getField("point.geohash"), nullValue());
assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3"));
} }