Fix blended terms for non-strings take 2

It had some funky errors, like lenient:true not working and queries with
two integer fields blowing up if there was no analyzer defined on the
query. This throws a bunch more tests at it and rejiggers how non-strings
are handled so they don't wander off into scary QueryBuilder-land unless
they have a nice strong analyzer to protect them.
This commit is contained in:
Nik Everett 2016-01-11 10:06:46 -05:00
parent d8af49eb91
commit 50098bfb2c
6 changed files with 244 additions and 48 deletions

View File

@ -389,7 +389,12 @@ public abstract class MappedFieldType extends FieldType {
return false;
}
/** Creates a term associated with the field of this mapper for the given value */
/**
* Creates a term associated with the field of this mapper for the given
* value. Its important to use termQuery when building term queries because
* things like ParentFieldMapper override it to make more interesting
* queries.
*/
protected Term createTerm(Object value) {
return new Term(name(), indexedValueForSearch(value));
}

View File

@ -212,10 +212,6 @@ public class MatchQuery {
this.zeroTermsQuery = zeroTermsQuery;
}
protected boolean forceAnalyzeQueryString() {
return false;
}
protected Analyzer getAnalyzer(MappedFieldType fieldType) {
if (this.analyzer == null) {
if (fieldType != null) {
@ -240,17 +236,19 @@ public class MatchQuery {
field = fieldName;
}
if (fieldType != null && fieldType.useTermQueryWithQueryString() && !forceAnalyzeQueryString()) {
try {
return fieldType.termQuery(value, context);
} catch (RuntimeException e) {
if (lenient) {
return null;
}
throw e;
}
/*
* If the user forced an analyzer we really don't care if they are
* searching a type that wants term queries to be used with query string
* because the QueryBuilder will take care of it. If they haven't forced
* an analyzer then types like NumberFieldType that want terms with
* query string will blow up because their analyzer isn't capable of
* passing through QueryBuilder.
*/
boolean noForcedAnalyzer = this.analyzer == null;
if (fieldType != null && fieldType.useTermQueryWithQueryString() && noForcedAnalyzer) {
return termQuery(fieldType, value);
}
Analyzer analyzer = getAnalyzer(fieldType);
assert analyzer != null;
MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType);
@ -282,6 +280,26 @@ public class MatchQuery {
}
}
/**
* Creates a TermQuery-like-query for MappedFieldTypes that don't support
* QueryBuilder which is very string-ish. Just delegates to the
* MappedFieldType for MatchQuery but gets more complex for blended queries.
*/
protected Query termQuery(MappedFieldType fieldType, Object value) {
return termQuery(fieldType, value, lenient);
}
protected final Query termQuery(MappedFieldType fieldType, Object value, boolean lenient) {
try {
return fieldType.termQuery(value, context);
} catch (RuntimeException e) {
if (lenient) {
return null;
}
throw e;
}
}
protected Query zeroTermsQuery() {
return zeroTermsQuery == DEFAULT_ZERO_TERMS_QUERY ? Queries.newMatchNoDocsQuery() : Queries.newMatchAllQuery();
}
@ -289,20 +307,20 @@ public class MatchQuery {
private class MatchQueryBuilder extends QueryBuilder {
private final MappedFieldType mapper;
/**
* Creates a new QueryBuilder using the given analyzer.
*/
public MatchQueryBuilder(Analyzer analyzer, @Nullable MappedFieldType mapper) {
super(analyzer);
this.mapper = mapper;
}
}
@Override
protected Query newTermQuery(Term term) {
return blendTermQuery(term, mapper);
}
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
final MultiPhrasePrefixQuery prefixQuery = new MultiPhrasePrefixQuery();
@ -352,11 +370,16 @@ public class MatchQuery {
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
if (fuzziness != null) {
if (fieldType != null) {
Query query = fieldType.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions);
if (query instanceof FuzzyQuery) {
QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod);
try {
Query query = fieldType.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions);
if (query instanceof FuzzyQuery) {
QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod);
}
return query;
} catch (RuntimeException e) {
return new TermQuery(term);
// See long comment below about why we're lenient here.
}
return query;
}
int edits = fuzziness.asDistance(term.text());
FuzzyQuery query = new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions);
@ -364,9 +387,25 @@ public class MatchQuery {
return query;
}
if (fieldType != null) {
Query termQuery = fieldType.queryStringTermQuery(term);
if (termQuery != null) {
return termQuery;
/*
* Its a bit weird to default to lenient here but its the backwards
* compatible. It makes some sense when you think about what we are
* doing here: at this point the user has forced an analyzer and
* passed some string to the match query. We cut it up using the
* analyzer and then tried to cram whatever we get into the field.
* lenient=true here means that we try the terms in the query and on
* the off chance that they are actually valid terms then we
* actually try them. lenient=false would mean that we blow up the
* query if they aren't valid terms. "valid" in this context means
* "parses properly to something of the type being queried." So "1"
* is a valid number, etc.
*
* We use the text form here because we we've received the term from
* an analyzer that cut some string into text.
*/
Query query = termQuery(fieldType, term.bytes(), true);
if (query != null) {
return query;
}
}
return new TermQuery(term);

View File

@ -27,7 +27,6 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.mapper.MappedFieldType;
@ -104,7 +103,7 @@ public class MultiMatchQuery extends MatchQuery {
this.tieBreaker = tieBreaker;
}
public List<Query> buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map<String, Float> fieldNames, Object value, String minimumShouldMatch) throws IOException{
public List<Query> buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map<String, Float> fieldNames, Object value, String minimumShouldMatch) throws IOException{
List<Query> queries = new ArrayList<>();
for (String fieldName : fieldNames.keySet()) {
Float boostValue = fieldNames.get(fieldName);
@ -146,8 +145,8 @@ public class MultiMatchQuery extends MatchQuery {
return MultiMatchQuery.super.blendTermQuery(term, fieldType);
}
public boolean forceAnalyzeQueryString() {
return false;
public Query termQuery(MappedFieldType fieldType, Object value) {
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
}
}
@ -196,8 +195,13 @@ public class MultiMatchQuery extends MatchQuery {
} else {
blendedFields = null;
}
final FieldAndFieldType fieldAndFieldType = group.get(0);
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
/*
* We have to pick some field to pass through the superclass so
* we just pick the first field. It shouldn't matter because
* fields are already grouped by their analyzers/types.
*/
String representativeField = group.get(0).field;
Query q = parseGroup(type.matchQueryType(), representativeField, 1f, value, minimumShouldMatch);
if (q != null) {
queries.add(q);
}
@ -206,11 +210,6 @@ public class MultiMatchQuery extends MatchQuery {
return queries.isEmpty() ? null : queries;
}
@Override
public boolean forceAnalyzeQueryString() {
return blendedFields != null;
}
@Override
public Query blendTerm(Term term, MappedFieldType fieldType) {
if (blendedFields == null) {
@ -231,6 +230,16 @@ public class MultiMatchQuery extends MatchQuery {
}
return BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker);
}
@Override
public Query termQuery(MappedFieldType fieldType, Object value) {
/*
* Use the string value of the term because we're reusing the
* portion of the query is usually after the analyzer has run on
* each term. We just skip that analyzer phase.
*/
return blendTerm(new Term(fieldType.name(), value.toString()), fieldType);
}
}
@Override
@ -241,6 +250,15 @@ public class MultiMatchQuery extends MatchQuery {
return queryBuilder.blendTerm(term, fieldType);
}
@Override
protected Query termQuery(MappedFieldType fieldType, Object value) {
if (queryBuilder == null) {
// Can be null when the MultiMatchQuery collapses into a MatchQuery
return super.termQuery(fieldType, value);
}
return queryBuilder.termQuery(fieldType, value);
}
private static final class FieldAndFieldType {
final String field;
final MappedFieldType fieldType;
@ -255,18 +273,17 @@ public class MultiMatchQuery extends MatchQuery {
public Term newTerm(String value) {
try {
final BytesRef bytesRef = fieldType.indexedValueForSearch(value);
return new Term(field, bytesRef);
} catch (Exception ex) {
/*
* Note that this ignore any overrides the fieldType might do
* for termQuery, meaning things like _parent won't work here.
*/
return new Term(fieldType.name(), fieldType.indexedValueForSearch(value));
} catch (RuntimeException ex) {
// we can't parse it just use the incoming value -- it will
// just have a DF of 0 at the end of the day and will be ignored
// Note that this is like lenient = true allways
}
return new Term(field, value);
}
}
@Override
protected boolean forceAnalyzeQueryString() {
return this.queryBuilder == null ? super.forceAnalyzeQueryString() : this.queryBuilder.forceAnalyzeQueryString();
}
}

View File

@ -24,14 +24,18 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MatchQuery.ZeroTermsQuery;
import org.hamcrest.Matcher;
import org.joda.time.format.ISODateTimeFormat;
import java.io.IOException;
import java.util.Locale;
@ -120,15 +124,15 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
switch (queryBuilder.type()) {
case BOOLEAN:
assertThat(query, either(instanceOf(BooleanQuery.class)).or(instanceOf(ExtendedCommonTermsQuery.class))
.or(instanceOf(TermQuery.class)).or(instanceOf(FuzzyQuery.class)));
.or(instanceOf(TermQuery.class)).or(instanceOf(FuzzyQuery.class)).or(instanceOf(NumericRangeQuery.class)));
break;
case PHRASE:
assertThat(query, either(instanceOf(BooleanQuery.class)).or(instanceOf(PhraseQuery.class))
.or(instanceOf(TermQuery.class)).or(instanceOf(FuzzyQuery.class)));
.or(instanceOf(TermQuery.class)).or(instanceOf(FuzzyQuery.class)).or(instanceOf(NumericRangeQuery.class)));
break;
case PHRASE_PREFIX:
assertThat(query, either(instanceOf(BooleanQuery.class)).or(instanceOf(MultiPhrasePrefixQuery.class))
.or(instanceOf(TermQuery.class)).or(instanceOf(FuzzyQuery.class)));
.or(instanceOf(TermQuery.class)).or(instanceOf(FuzzyQuery.class)).or(instanceOf(NumericRangeQuery.class)));
break;
}
@ -173,10 +177,45 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
// compare lowercased terms here
String originalTermLc = queryBuilder.value().toString().toLowerCase(Locale.ROOT);
String actualTermLc = fuzzyQuery.getTerm().text().toLowerCase(Locale.ROOT);
assertThat(actualTermLc, equalTo(originalTermLc));
Matcher<String> termLcMatcher = equalTo(originalTermLc);
if ("false".equals(originalTermLc) || "true".equals(originalTermLc)) {
// Booleans become t/f when querying a boolean field
termLcMatcher = either(termLcMatcher).or(equalTo(originalTermLc.substring(0, 1)));
}
assertThat(actualTermLc, termLcMatcher);
assertThat(queryBuilder.prefixLength(), equalTo(fuzzyQuery.getPrefixLength()));
assertThat(queryBuilder.fuzzyTranspositions(), equalTo(fuzzyQuery.getTranspositions()));
}
if (query instanceof NumericRangeQuery) {
// These are fuzzy numeric queries
assertTrue(queryBuilder.fuzziness() != null);
@SuppressWarnings("unchecked")
NumericRangeQuery<Number> numericRangeQuery = (NumericRangeQuery<Number>) query;
assertTrue(numericRangeQuery.includesMin());
assertTrue(numericRangeQuery.includesMax());
double value;
try {
value = Double.parseDouble(queryBuilder.value().toString());
} catch (NumberFormatException e) {
// Maybe its a date
value = ISODateTimeFormat.dateTimeParser().parseMillis(queryBuilder.value().toString());
}
double width;
if (queryBuilder.fuzziness().equals(Fuzziness.AUTO)) {
width = 1;
} else {
try {
width = queryBuilder.fuzziness().asDouble();
} catch (NumberFormatException e) {
// Maybe a time value?
width = queryBuilder.fuzziness().asTimeValue().getMillis();
}
}
assertEquals(value - width, numericRangeQuery.getMin().doubleValue(), width * .1);
assertEquals(value + width, numericRangeQuery.getMax().doubleValue(), width * .1);
}
}
public void testIllegalValues() {

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@ -132,7 +133,8 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
.or(instanceOf(BooleanQuery.class)).or(instanceOf(DisjunctionMaxQuery.class))
.or(instanceOf(FuzzyQuery.class)).or(instanceOf(MultiPhrasePrefixQuery.class))
.or(instanceOf(MatchAllDocsQuery.class)).or(instanceOf(ExtendedCommonTermsQuery.class))
.or(instanceOf(MatchNoDocsQuery.class)).or(instanceOf(PhraseQuery.class)));
.or(instanceOf(MatchNoDocsQuery.class)).or(instanceOf(PhraseQuery.class))
.or(instanceOf(NumericRangeQuery.class)));
}
public void testIllegaArguments() {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.query;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
@ -230,6 +231,12 @@ public class MultiMatchQueryIT extends ESIntegTestCase {
.setQuery(randomizeType(multiMatchQuery("15", "skill"))).get();
assertNoFailures(searchResponse);
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("15", "skill", "int-field")).analyzer("category")).get();
assertNoFailures(searchResponse);
assertFirstHit(searchResponse, hasId("theone"));
String[] fields = {"full_name", "first_name", "last_name", "last_name_phrase", "first_name_phrase", "category_phrase", "category"};
String[] query = {"marvel","hero", "captain", "america", "15", "17", "1", "5", "ultimate", "Man",
@ -459,18 +466,65 @@ public class MultiMatchQueryIT extends ESIntegTestCase {
assertHitCount(searchResponse, 1l);
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("captain america 15", "full_name", "first_name", "last_name", "category", "skill", "int-field")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category")
.operator(Operator.AND))).get();
assertHitCount(searchResponse, 1l);
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("captain america 15", "skill", "full_name", "first_name", "last_name", "category", "int-field")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category")
.operator(Operator.AND))).get();
assertHitCount(searchResponse, 1l);
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("captain america 15", "first_name", "last_name", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category"))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("15", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category"))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("25 15", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category"))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("25 15", "int-field", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category"))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("25 15", "first_name", "int-field", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category"))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("25 15", "int-field", "skill", "first_name")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category"))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("25 15", "int-field", "first_name", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.analyzer("category"))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("captain america marvel hero", "first_name", "last_name", "category")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
@ -529,6 +583,46 @@ public class MultiMatchQueryIT extends ESIntegTestCase {
assertFirstHit(searchResponse, hasId("ultimate2"));
assertSecondHit(searchResponse, hasId("ultimate1"));
assertThat(searchResponse.getHits().hits()[0].getScore(), greaterThan(searchResponse.getHits().hits()[1].getScore()));
// Test group based on numeric fields
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("15", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("15", "skill", "first_name")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
assertFirstHit(searchResponse, hasId("theone"));
// Two numeric fields together caused trouble at one point!
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("15", "int-field", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("15", "int-field", "first_name", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
assertFirstHit(searchResponse, hasId("theone"));
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("alpha 15", "first_name", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.lenient(true))).get();
assertFirstHit(searchResponse, hasId("ultimate1"));
/*
* Doesn't find theone because "alpha 15" isn't a number and we don't
* break on spaces.
*/
assertHitCount(searchResponse, 1);
// Lenient wasn't always properly lenient with two numeric fields
searchResponse = client().prepareSearch("test")
.setQuery(randomizeType(multiMatchQuery("alpha 15", "int-field", "first_name", "skill")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
.lenient(true))).get();
assertFirstHit(searchResponse, hasId("ultimate1"));
}
private static final void assertEquivalent(String query, SearchResponse left, SearchResponse right) {