From 636e85e5b7d2c9ae90f9ffd61ffb90b89d4d7f81 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 7 Aug 2017 14:54:10 +0200 Subject: [PATCH] percolator: Hint what clauses are important in a conjunction query based on fields The percolator field mapper doesn't need to extract all terms and ranges from a bool query with must or filter clauses. In order to help to default extraction behavior, boost fields can be configured, so that fields that are known for not being selective enough can be ignored in favor for other fields or clauses with specific fields can forcefully take precedence over other clauses. This can help selecting clauses for fields that don't match with a lot of percolator queries over other clauses and thus improving performance of the percolate query. For example a status like field is something that should configured as an ignore field. Queries on this field tend to match with more documents and so if clauses for this fields get selected as best clause then that isn't very helpful for the candidate query that the percolate query generates to filter out percolator queries that are likely not going to match. --- .../mapping/types/percolator.asciidoc | 63 ++++++ .../percolator/PercolatorFieldMapper.java | 58 ++++- .../percolator/QueryAnalyzer.java | 187 +++++++++------- .../PercolatorFieldMapperTests.java | 86 +++++++- .../percolator/PercolatorQuerySearchIT.java | 30 +++ .../percolator/QueryAnalyzerTests.java | 199 ++++++++++++------ 6 files changed, 479 insertions(+), 144 deletions(-) diff --git a/docs/reference/mapping/types/percolator.asciidoc b/docs/reference/mapping/types/percolator.asciidoc index b9fe510c31c..47eb3efc89d 100644 --- a/docs/reference/mapping/types/percolator.asciidoc +++ b/docs/reference/mapping/types/percolator.asciidoc @@ -59,6 +59,69 @@ Fields referred in a percolator query may exist in any type of the index contain ===================================== +[float] +==== Influencing query extraction + +As part of indexing the percolator query, the percolator field mapper extracts the query terms and numeric ranges from the provided +query and indexes that alongside the query in separate internal fields. The `percolate` query uses these internal fields +to build a candidate query from the document being percolated in order to reduce the number of document that need to be verified. + +In case a percolator query contains a `bool` query with must or filter clauses, then the percolator field mapper only has to +extract ranges or terms from a single clause. The percolator field mapper will prefer longer terms over shorter terms, because +longer terms in general match with less documents. For the same reason it prefers smaller ranges over bigger ranges. + +In general this behaviour works well. However sometimes there are fields in a bool query that shouldn't be taken into account +when selecting the best must or filter clause, or fields are known to be more selective than other fields. + +For example a status like field may in fact not work well, because each status matches with many percolator queries and +then the candidate query the `percolate` query generates may not be able to filter out that many percolator queries. + +The percolator field mapping allows to configure `boost_fields` in order to indicate to the percolator what fields are +important or not important when selecting the best must or filter clause in a `bool` query: + +[source,js] +-------------------------------------------------- +PUT another_index +{ + "mappings": { + "doc": { + "properties": { + "query": { + "type": "percolator", + "boost_fields": { + "status_field": 0, <1> + "price_field": 2 <2> + } + }, + "status_field": { + "type": "keyword" + }, + "price_field": { + "type": "long" + }, + "field": { + "type": "text" + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +<1> A boost of zero hints to the percolator that if there are other clauses in a conjunction query then these should be + preferred over this one. + +<2> Any boost higher than 1 overrides the default behaviour when it comes to selecting the best clause. The clause + that has the field with the highest boost will be selected from a conjunction query for extraction. + +The steps the percolator field mapper takes when selecting a clause from a conjunction query: + +* If there are clauses that have boosted fields then the clause with highest boost field is selected. +* If there are range based clauses and term based clauses then term based clauses are picked over range based clauses +* From all term based clauses the clause with longest term is picked. +* In the case when there are only range based clauses then the range clause with smallest range is picked over clauses with wider ranges. + [float] ==== Reindexing your percolator queries diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java index d7efa90bb06..ee8c7ff44fb 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java @@ -83,6 +83,9 @@ import java.util.List; import java.util.Map; import java.util.function.Supplier; +import static org.elasticsearch.common.xcontent.support.XContentMapValues.isObject; +import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeFloatValue; +import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeStringValue; import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder; public class PercolatorFieldMapper extends FieldMapper { @@ -106,6 +109,7 @@ public class PercolatorFieldMapper extends FieldMapper { static class Builder extends FieldMapper.Builder { private final Supplier queryShardContext; + private final Map boostFields = new HashMap<>(); Builder(String fieldName, Supplier queryShardContext) { super(fieldName, FIELD_TYPE, FIELD_TYPE); @@ -130,7 +134,11 @@ public class PercolatorFieldMapper extends FieldMapper { setupFieldType(context); return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField, - extractionResultField, queryBuilderField, rangeFieldMapper); + extractionResultField, queryBuilderField, rangeFieldMapper, Collections.unmodifiableMap(boostFields)); + } + + void addBoostField(String field, float boost) { + this.boostFields.put(field, boost); } static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) { @@ -163,7 +171,24 @@ public class PercolatorFieldMapper extends FieldMapper { @Override public Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { - return new Builder(name, parserContext.queryShardContextSupplier()); + Builder builder = new Builder(name, parserContext.queryShardContextSupplier()); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + String propName = entry.getKey(); + Object propNode = entry.getValue(); + if (propName.equals("boost_fields")) { + if (isObject(propNode)) { + for (Map.Entry innerEntry : ((Map) propNode).entrySet()) { + String fieldName = nodeStringValue(innerEntry.getKey(), null); + builder.addBoostField(fieldName, nodeFloatValue(innerEntry.getValue())); + } + } else { + throw new IllegalArgumentException("boost_fields [" + propNode + "] is not an object"); + } + iterator.remove(); + } + } + return builder; } } @@ -277,12 +302,14 @@ public class PercolatorFieldMapper extends FieldMapper { private BinaryFieldMapper queryBuilderField; private RangeFieldMapper rangeFieldMapper; + private Map boostFields; PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, Supplier queryShardContext, KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField, - BinaryFieldMapper queryBuilderField, RangeFieldMapper rangeFieldMapper) { + BinaryFieldMapper queryBuilderField, RangeFieldMapper rangeFieldMapper, + Map boostFields) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); this.queryShardContext = queryShardContext; this.queryTermsField = queryTermsField; @@ -290,6 +317,7 @@ public class PercolatorFieldMapper extends FieldMapper { this.queryBuilderField = queryBuilderField; this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings); this.rangeFieldMapper = rangeFieldMapper; + this.boostFields = boostFields; } @Override @@ -367,7 +395,7 @@ public class PercolatorFieldMapper extends FieldMapper { FieldType pft = (FieldType) this.fieldType(); QueryAnalyzer.Result result; try { - result = QueryAnalyzer.analyze(query); + result = QueryAnalyzer.analyze(query, boostFields); } catch (QueryAnalyzer.UnsupportedQueryException e) { doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); return; @@ -437,6 +465,28 @@ public class PercolatorFieldMapper extends FieldMapper { return CONTENT_TYPE; } + @Override + protected void doMerge(Mapper mergeWith, boolean updateAllTypes) { + super.doMerge(mergeWith, updateAllTypes); + PercolatorFieldMapper percolatorMergeWith = (PercolatorFieldMapper) mergeWith; + + // Updating the boost_fields can be allowed, because it doesn't break previously indexed percolator queries + // However the updated boost_fields to completely take effect, percolator queries prior to the mapping update need to be reindexed + boostFields = percolatorMergeWith.boostFields; + } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + if (boostFields.isEmpty() == false) { + builder.startObject("boost_fields"); + for (Map.Entry entry : boostFields.entrySet()) { + builder.field(entry.getKey(), entry.getValue()); + } + builder.endObject(); + } + } + /** * Fails if a percolator contains an unsupported query. The following queries are not supported: * 1) a has_child query diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index f9c19a73473..77f937e680c 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -57,16 +57,17 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.function.Function; +import java.util.function.BiFunction; +import java.util.function.Predicate; import static java.util.stream.Collectors.toSet; final class QueryAnalyzer { - private static final Map, Function> queryProcessors; + private static final Map, BiFunction, Result>> queryProcessors; static { - Map, Function> map = new HashMap<>(); + Map, BiFunction, Result>> map = new HashMap<>(); map.put(MatchNoDocsQuery.class, matchNoDocsQuery()); map.put(ConstantScoreQuery.class, constantScoreQuery()); map.put(BoostQuery.class, boostQuery()); @@ -117,48 +118,48 @@ final class QueryAnalyzer { * query analysis is stopped and an UnsupportedQueryException is thrown. So that the caller can mark * this query in such a way that the PercolatorQuery always verifies if this query with the MemoryIndex. */ - static Result analyze(Query query) { + static Result analyze(Query query, Map boosts) { Class queryClass = query.getClass(); if (queryClass.isAnonymousClass()) { // Sometimes queries have anonymous classes in that case we need the direct super class. // (for example blended term query) queryClass = queryClass.getSuperclass(); } - Function queryProcessor = queryProcessors.get(queryClass); + BiFunction, Result> queryProcessor = queryProcessors.get(queryClass); if (queryProcessor != null) { - return queryProcessor.apply(query); + return queryProcessor.apply(query, boosts); } else { throw new UnsupportedQueryException(query); } } - private static Function matchNoDocsQuery() { - return (query -> new Result(true, Collections.emptySet())); + private static BiFunction, Result> matchNoDocsQuery() { + return (query, boosts) -> new Result(true, Collections.emptySet()); } - private static Function constantScoreQuery() { - return query -> { + private static BiFunction, Result> constantScoreQuery() { + return (query, boosts)-> { Query wrappedQuery = ((ConstantScoreQuery) query).getQuery(); - return analyze(wrappedQuery); + return analyze(wrappedQuery, boosts); }; } - private static Function boostQuery() { - return query -> { + private static BiFunction, Result> boostQuery() { + return (query, boosts) -> { Query wrappedQuery = ((BoostQuery) query).getQuery(); - return analyze(wrappedQuery); + return analyze(wrappedQuery, boosts); }; } - private static Function termQuery() { - return (query -> { + private static BiFunction, Result> termQuery() { + return (query, boosts) -> { TermQuery termQuery = (TermQuery) query; return new Result(true, Collections.singleton(new QueryExtraction(termQuery.getTerm()))); - }); + }; } - private static Function termInSetQuery() { - return query -> { + private static BiFunction, Result> termInSetQuery() { + return (query, boosts) -> { TermInSetQuery termInSetQuery = (TermInSetQuery) query; Set terms = new HashSet<>(); PrefixCodedTerms.TermIterator iterator = termInSetQuery.getTermData().iterator(); @@ -169,29 +170,29 @@ final class QueryAnalyzer { }; } - private static Function synonymQuery() { - return query -> { + private static BiFunction, Result> synonymQuery() { + return (query, boosts) -> { Set terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); return new Result(true, terms); }; } - private static Function commonTermsQuery() { - return query -> { + private static BiFunction, Result> commonTermsQuery() { + return (query, boosts) -> { Set terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); return new Result(false, terms); }; } - private static Function blendedTermQuery() { - return query -> { + private static BiFunction, Result> blendedTermQuery() { + return (query, boosts) -> { Set terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); return new Result(true, terms); }; } - private static Function phraseQuery() { - return query -> { + private static BiFunction, Result> phraseQuery() { + return (query, boosts) -> { Term[] terms = ((PhraseQuery) query).getTerms(); if (terms.length == 0) { return new Result(true, Collections.emptySet()); @@ -209,8 +210,8 @@ final class QueryAnalyzer { }; } - private static Function multiPhraseQuery() { - return query -> { + private static BiFunction, Result> multiPhraseQuery() { + return (query, boosts) -> { Term[][] terms = ((MultiPhraseQuery) query).getTermArrays(); if (terms.length == 0) { return new Result(true, Collections.emptySet()); @@ -219,58 +220,58 @@ final class QueryAnalyzer { Set bestTermArr = null; for (Term[] termArr : terms) { Set queryExtractions = Arrays.stream(termArr).map(QueryExtraction::new).collect(toSet()); - bestTermArr = selectBestExtraction(bestTermArr, queryExtractions); + bestTermArr = selectBestExtraction(boosts, bestTermArr, queryExtractions); } return new Result(false, bestTermArr); }; } - private static Function spanTermQuery() { - return query -> { + private static BiFunction, Result> spanTermQuery() { + return (query, boosts) -> { Term term = ((SpanTermQuery) query).getTerm(); return new Result(true, Collections.singleton(new QueryExtraction(term))); }; } - private static Function spanNearQuery() { - return query -> { + private static BiFunction, Result> spanNearQuery() { + return (query, boosts) -> { Set bestClauses = null; SpanNearQuery spanNearQuery = (SpanNearQuery) query; for (SpanQuery clause : spanNearQuery.getClauses()) { - Result temp = analyze(clause); - bestClauses = selectBestExtraction(temp.extractions, bestClauses); + Result temp = analyze(clause, boosts); + bestClauses = selectBestExtraction(boosts, temp.extractions, bestClauses); } return new Result(false, bestClauses); }; } - private static Function spanOrQuery() { - return query -> { + private static BiFunction, Result> spanOrQuery() { + return (query, boosts) -> { Set terms = new HashSet<>(); SpanOrQuery spanOrQuery = (SpanOrQuery) query; for (SpanQuery clause : spanOrQuery.getClauses()) { - terms.addAll(analyze(clause).extractions); + terms.addAll(analyze(clause, boosts).extractions); } return new Result(false, terms); }; } - private static Function spanNotQuery() { - return query -> { - Result result = analyze(((SpanNotQuery) query).getInclude()); + private static BiFunction, Result> spanNotQuery() { + return (query, boosts) -> { + Result result = analyze(((SpanNotQuery) query).getInclude(), boosts); return new Result(false, result.extractions); }; } - private static Function spanFirstQuery() { - return query -> { - Result result = analyze(((SpanFirstQuery) query).getMatch()); + private static BiFunction, Result> spanFirstQuery() { + return (query, boosts) -> { + Result result = analyze(((SpanFirstQuery) query).getMatch(), boosts); return new Result(false, result.extractions); }; } - private static Function booleanQuery() { - return query -> { + private static BiFunction, Result> booleanQuery() { + return (query, boosts) -> { BooleanQuery bq = (BooleanQuery) query; List clauses = bq.clauses(); int minimumShouldMatch = bq.getMinimumNumberShouldMatch(); @@ -301,12 +302,12 @@ final class QueryAnalyzer { Result temp; try { - temp = analyze(clause.getQuery()); + temp = analyze(clause.getQuery(), boosts); } catch (UnsupportedQueryException e) { uqe = e; continue; } - bestClause = selectBestExtraction(temp.extractions, bestClause); + bestClause = selectBestExtraction(boosts, temp.extractions, bestClause); } if (bestClause != null) { return new Result(false, bestClause); @@ -326,22 +327,22 @@ final class QueryAnalyzer { disjunctions.add(clause.getQuery()); } } - return handleDisjunction(disjunctions, minimumShouldMatch, numProhibitedClauses > 0); + return handleDisjunction(disjunctions, minimumShouldMatch, numProhibitedClauses > 0, boosts); } }; } - private static Function disjunctionMaxQuery() { - return query -> { + private static BiFunction, Result> disjunctionMaxQuery() { + return (query, boosts) -> { List disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts(); - return handleDisjunction(disjuncts, 1, false); + return handleDisjunction(disjuncts, 1, false, boosts); }; } - private static Function functionScoreQuery() { - return query -> { + private static BiFunction, Result> functionScoreQuery() { + return (query, boosts) -> { FunctionScoreQuery functionScoreQuery = (FunctionScoreQuery) query; - Result result = analyze(functionScoreQuery.getSubQuery()); + Result result = analyze(functionScoreQuery.getSubQuery(), boosts); // If min_score is specified we can't guarantee upfront that this percolator query matches, // so in that case we set verified to false. // (if it matches with the percolator document matches with the extracted terms. @@ -351,8 +352,8 @@ final class QueryAnalyzer { }; } - private static Function pointRangeQuery() { - return query -> { + private static BiFunction, Result> pointRangeQuery() { + return (query, boosts) -> { PointRangeQuery pointRangeQuery = (PointRangeQuery) query; byte[] lowerPoint = pointRangeQuery.getLowerPoint(); byte[] upperPoint = pointRangeQuery.getUpperPoint(); @@ -371,18 +372,19 @@ final class QueryAnalyzer { return result; } - private static Function indexOrDocValuesQuery() { - return query -> { + private static BiFunction, Result> indexOrDocValuesQuery() { + return (query, boosts) -> { IndexOrDocValuesQuery indexOrDocValuesQuery = (IndexOrDocValuesQuery) query; - return analyze(indexOrDocValuesQuery.getIndexQuery()); + return analyze(indexOrDocValuesQuery.getIndexQuery(), boosts); }; } - private static Result handleDisjunction(List disjunctions, int minimumShouldMatch, boolean otherClauses) { + private static Result handleDisjunction(List disjunctions, int minimumShouldMatch, boolean otherClauses, + Map boosts) { boolean verified = minimumShouldMatch <= 1 && otherClauses == false; Set terms = new HashSet<>(); for (Query disjunct : disjunctions) { - Result subResult = analyze(disjunct); + Result subResult = analyze(disjunct, boosts); if (subResult.verified == false) { verified = false; } @@ -391,22 +393,53 @@ final class QueryAnalyzer { return new Result(verified, terms); } - static Set selectBestExtraction(Set extractions1, Set extractions2) { + static Set selectBestExtraction(Map boostFields, Set extractions1, + Set extractions2) { assert extractions1 != null || extractions2 != null; if (extractions1 == null) { return extractions2; } else if (extractions2 == null) { return extractions1; } else { + Set filtered1; + Set filtered2; + if (boostFields.isEmpty() == false) { + Predicate predicate = extraction -> { + String fieldName = extraction.term != null ? extraction.term.field() : extraction.range.fieldName; + float boost = boostFields.getOrDefault(fieldName, 1F); + return boost != 0F; + }; + filtered1 = extractions1.stream().filter(predicate).collect(toSet()); + if (filtered1.isEmpty()) { + return extractions2; + } + filtered2 = extractions2.stream().filter(predicate).collect(toSet()); + if (filtered2.isEmpty()) { + return extractions1; + } + + float extraction1LowestBoost = lowestBoost(filtered1, boostFields); + float extraction2LowestBoost = lowestBoost(filtered2, boostFields); + if (extraction1LowestBoost > extraction2LowestBoost) { + return extractions1; + } else if (extraction2LowestBoost > extraction1LowestBoost) { + return extractions2; + } + // Step out, because boosts are equal, so pick best extraction on either term or range size. + } else { + filtered1 = extractions1; + filtered2 = extractions2; + } + // Prefer term based extractions over range based extractions: boolean onlyRangeBasedExtractions = true; - for (QueryExtraction clause : extractions1) { + for (QueryExtraction clause : filtered1) { if (clause.term != null) { onlyRangeBasedExtractions = false; break; } } - for (QueryExtraction clause : extractions2) { + for (QueryExtraction clause : filtered2) { if (clause.term != null) { onlyRangeBasedExtractions = false; break; @@ -414,19 +447,19 @@ final class QueryAnalyzer { } if (onlyRangeBasedExtractions) { - BytesRef terms1SmallestRange = smallestRange(extractions1); - BytesRef terms2SmallestRange = smallestRange(extractions2); + BytesRef extraction1SmallestRange = smallestRange(filtered1); + BytesRef extraction2SmallestRange = smallestRange(filtered2); // Keep the clause with smallest range, this is likely to be the rarest. - if (terms1SmallestRange.compareTo(terms2SmallestRange) <= 0) { + if (extraction1SmallestRange.compareTo(extraction2SmallestRange) <= 0) { return extractions1; } else { return extractions2; } } else { - int terms1ShortestTerm = minTermLength(extractions1); - int terms2ShortestTerm = minTermLength(extractions2); + int extraction1ShortestTerm = minTermLength(filtered1); + int extraction2ShortestTerm = minTermLength(filtered2); // keep the clause with longest terms, this likely to be rarest. - if (terms1ShortestTerm >= terms2ShortestTerm) { + if (extraction1ShortestTerm >= extraction2ShortestTerm) { return extractions1; } else { return extractions2; @@ -435,6 +468,16 @@ final class QueryAnalyzer { } } + private static float lowestBoost(Set extractions, Map boostFields) { + float lowestBoost = Float.POSITIVE_INFINITY; + for (QueryExtraction extraction : extractions) { + String fieldName = extraction.term != null ? extraction.term.field() : extraction.range.fieldName; + float boost = boostFields.getOrDefault(fieldName, 1F); + lowestBoost = Math.min(lowestBoost, boost); + } + return lowestBoost; + } + private static int minTermLength(Set extractions) { // In case there are only range extractions, then we return Integer.MIN_VALUE, // so that selectBestExtraction(...) we are likely to prefer the extractions that contains at least a single extraction diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java index ca87ff6423d..0f6b60354e6 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java @@ -44,11 +44,11 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.hash.MurmurHash3; import org.elasticsearch.common.io.stream.InputStreamStreamInput; import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.hash.MurmurHash3; import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -741,6 +741,90 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { return Arrays.copyOfRange(source, offset, offset + length); } + public void testBoostFields() throws Exception { + IndexService indexService = createIndex("another_index"); + MapperService mapperService = indexService.mapperService(); + + String mapper = XContentFactory.jsonBuilder().startObject().startObject("doc") + .startObject("_field_names").field("enabled", false).endObject() // makes testing easier + .startObject("properties") + .startObject("status").field("type", "keyword").endObject() + .startObject("update_field").field("type", "keyword").endObject() + .startObject("price").field("type", "long").endObject() + .startObject("query1").field("type", "percolator") + .startObject("boost_fields").field("status", 0).field("updated_field", 2).endObject() + .endObject() + .startObject("query2").field("type", "percolator").endObject() + .endObject().endObject().endObject().string(); + mapperService.merge("doc", new CompressedXContent(mapper), MapperService.MergeReason.MAPPING_UPDATE, false); + DocumentMapper documentMapper = mapperService.documentMapper("doc"); + + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.add(new TermQuery(new Term("status", "updated")), Occur.FILTER); + bq.add(LongPoint.newRangeQuery("price", 5, 10), Occur.FILTER); + + // Boost fields will ignore status_field: + PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper("query1"); + ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, + mapperService.documentMapperParser(), documentMapper, null, null); + fieldMapper.processQuery(bq.build(), parseContext); + ParseContext.Document document = parseContext.doc(); + PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_PARTIAL)); + assertThat(document.getFields(fieldType.queryTermsField.name()).length, equalTo(0)); + List fields = new ArrayList<>(Arrays.asList(document.getFields(fieldType.rangeField.name()))); + assertThat(fields.size(), equalTo(1)); + assertThat(LongPoint.decodeDimension(subByteArray(fields.get(0).binaryValue().bytes, 8, 8), 0), equalTo(5L)); + assertThat(LongPoint.decodeDimension(subByteArray(fields.get(0).binaryValue().bytes, 24, 8), 0), equalTo(10L)); + + // No boost fields, so default extraction logic: + fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper("query2"); + parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, mapperService.documentMapperParser(), + documentMapper, null, null); + fieldMapper.processQuery(bq.build(), parseContext); + document = parseContext.doc(); + fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_PARTIAL)); + assertThat(document.getFields(fieldType.rangeField.name()).length, equalTo(0)); + fields = new ArrayList<>(Arrays.asList(document.getFields(fieldType.queryTermsField.name()))); + assertThat(fields.size(), equalTo(1)); + assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("status\0updated")); + + // Second clause is extracted, because it is boosted by 2: + bq = new BooleanQuery.Builder(); + bq.add(new TermQuery(new Term("status", "updated")), Occur.FILTER); + bq.add(new TermQuery(new Term("updated_field", "done")), Occur.FILTER); + + fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper("query1"); + parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, mapperService.documentMapperParser(), + documentMapper, null, null); + fieldMapper.processQuery(bq.build(), parseContext); + document = parseContext.doc(); + fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_PARTIAL)); + assertThat(document.getFields(fieldType.rangeField.name()).length, equalTo(0)); + fields = new ArrayList<>(Arrays.asList(document.getFields(fieldType.queryTermsField.name()))); + assertThat(fields.size(), equalTo(1)); + assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("updated_field\0done")); + + // First clause is extracted, because default logic: + bq = new BooleanQuery.Builder(); + bq.add(new TermQuery(new Term("status", "updated")), Occur.FILTER); + bq.add(new TermQuery(new Term("updated_field", "done")), Occur.FILTER); + + fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper("query2"); + parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, mapperService.documentMapperParser(), + documentMapper, null, null); + fieldMapper.processQuery(bq.build(), parseContext); + document = parseContext.doc(); + fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_PARTIAL)); + assertThat(document.getFields(fieldType.rangeField.name()).length, equalTo(0)); + fields = new ArrayList<>(Arrays.asList(document.getFields(fieldType.queryTermsField.name()))); + assertThat(fields.size(), equalTo(1)); + assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("status\0updated")); + } + // Just so that we store scripts in percolator queries, but not really execute these scripts. public static class FoolMeScriptPlugin extends MockScriptPlugin { diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java index 55835dec92b..17833864a42 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java @@ -650,4 +650,34 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase { assertThat(item.getFailureMessage(), containsString("[test/type/6] couldn't be found")); } + public void testBoostFields() throws Exception { + XContentBuilder mappingSource = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("status").field("type", "keyword").endObject() + .startObject("price").field("type", "long").endObject() + .startObject("query").field("type", "percolator") + .startObject("boost_fields").field("status", 0.0F).endObject() + .endObject() + .endObject().endObject().endObject(); + assertAcked(client().admin().indices().prepareCreate("test").addMapping("type", mappingSource)); + + client().prepareIndex("test", "type", "q1") + .setSource(jsonBuilder().startObject().field("query", boolQuery() + .must(matchQuery("status", "sold")) + .must(matchQuery("price", 100)) + ).endObject()) + .get(); + refresh(); + + SearchResponse response = client().prepareSearch() + .setQuery(new PercolateQueryBuilder("query", + XContentFactory.jsonBuilder().startObject() + .field("status", "sold") + .field("price", 100) + .endObject().bytes(), XContentType.JSON)) + .get(); + assertHitCount(response, 1); + assertThat(response.getHits().getAt(0).getId(), equalTo("q1")); + } + } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index 9bfbce2ed07..e1e28b2bbee 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -59,9 +59,12 @@ import org.elasticsearch.test.ESTestCase; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -77,7 +80,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_termQuery() { TermQuery termQuery = new TermQuery(new Term("_field", "_term")); - Result result = analyze(termQuery); + Result result = analyze(termQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); @@ -87,7 +90,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_termsQuery() { TermInSetQuery termsQuery = new TermInSetQuery("_field", new BytesRef("_term1"), new BytesRef("_term2")); - Result result = analyze(termsQuery); + Result result = analyze(termsQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.extractions); terms.sort(Comparator.comparing(qt -> qt.term)); @@ -100,7 +103,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_phraseQuery() { PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2"); - Result result = analyze(phraseQuery); + Result result = analyze(phraseQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); @@ -115,7 +118,7 @@ public class QueryAnalyzerTests extends ESTestCase { .add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")}) .add(new Term[] {new Term("_field", "_very_long_term")}) .build(); - Result result = analyze(multiPhraseQuery); + Result result = analyze(multiPhraseQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); @@ -138,7 +141,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); - Result result = analyze(booleanQuery); + Result result = analyze(booleanQuery, Collections.emptyMap()); assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false)); List terms = new ArrayList<>(result.extractions); terms.sort(Comparator.comparing(qt -> qt.term)); @@ -166,7 +169,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); - Result result = analyze(booleanQuery); + Result result = analyze(booleanQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.extractions); terms.sort(Comparator.comparing(qt -> qt.term)); @@ -189,7 +192,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(phraseQuery, BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); - Result result = analyze(booleanQuery); + Result result = analyze(booleanQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); @@ -203,58 +206,58 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(termQuery1, BooleanClause.Occur.SHOULD); TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2")); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - Result result = analyze(builder.build()); + Result result = analyze(builder.build(), Collections.emptyMap()); assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, BooleanClause.Occur.SHOULD); PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2"); builder.add(phraseQuery1, BooleanClause.Occur.SHOULD); - result = analyze(builder.build()); + result = analyze(builder.build(), Collections.emptyMap()); assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(phraseQuery1, BooleanClause.Occur.SHOULD); PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4"); builder.add(phraseQuery2, BooleanClause.Occur.SHOULD); - result = analyze(builder.build()); + result = analyze(builder.build(), Collections.emptyMap()); assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, BooleanClause.Occur.MUST_NOT); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - result = analyze(builder.build()); + result = analyze(builder.build(), Collections.emptyMap()); assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32)); builder.add(termQuery1, BooleanClause.Occur.SHOULD); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - result = analyze(builder.build()); + result = analyze(builder.build(), Collections.emptyMap()); assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); - result = analyze(builder.build()); + result = analyze(builder.build(), Collections.emptyMap()); assertThat("Single required clause, so candidate matches are verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); - result = analyze(builder.build()); + result = analyze(builder.build(), Collections.emptyMap()); assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); builder.add(termQuery2, BooleanClause.Occur.MUST_NOT); - result = analyze(builder.build()); + result = analyze(builder.build(), Collections.emptyMap()); assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false)); } public void testExtractQueryMetadata_constantScoreQuery() { TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1); - Result result = analyze(constantScoreQuery); + Result result = analyze(constantScoreQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); @@ -265,7 +268,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_boostQuery() { TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f); - Result result = analyze(constantScoreQuery); + Result result = analyze(constantScoreQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); @@ -277,7 +280,7 @@ public class QueryAnalyzerTests extends ESTestCase { CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100); commonTermsQuery.add(new Term("_field", "_term1")); commonTermsQuery.add(new Term("_field", "_term2")); - Result result = analyze(commonTermsQuery); + Result result = analyze(commonTermsQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); List terms = new ArrayList<>(result.extractions); terms.sort(Comparator.comparing(qt -> qt.term)); @@ -291,7 +294,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_blendedTermQuery() { Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")}; BlendedTermQuery commonTermsQuery = BlendedTermQuery.dismaxBlendedQuery(termsArr, 1.0f); - Result result = analyze(commonTermsQuery); + Result result = analyze(commonTermsQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.extractions); terms.sort(Comparator.comparing(qt -> qt.term)); @@ -315,7 +318,7 @@ public class QueryAnalyzerTests extends ESTestCase { // 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); - Result result = analyze(spanTermQuery1); + Result result = analyze(spanTermQuery1, Collections.emptyMap()); assertThat(result.verified, is(true)); assertTermsEqual(result.extractions, spanTermQuery1.getTerm()); } @@ -326,7 +329,7 @@ public class QueryAnalyzerTests extends ESTestCase { SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true) .addClause(spanTermQuery1).addClause(spanTermQuery2).build(); - Result result = analyze(spanNearQuery); + Result result = analyze(spanNearQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, spanTermQuery2.getTerm()); } @@ -335,7 +338,7 @@ public class QueryAnalyzerTests extends ESTestCase { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2); - Result result = analyze(spanOrQuery); + Result result = analyze(spanOrQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, spanTermQuery1.getTerm(), spanTermQuery2.getTerm()); } @@ -343,7 +346,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_spanFirstQuery() { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20); - Result result = analyze(spanFirstQuery); + Result result = analyze(spanFirstQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, spanTermQuery1.getTerm()); } @@ -352,27 +355,27 @@ public class QueryAnalyzerTests extends ESTestCase { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2); - Result result = analyze(spanNotQuery); + Result result = analyze(spanNotQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, spanTermQuery1.getTerm()); } public void testExtractQueryMetadata_matchNoDocsQuery() { - Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all")); + Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all"), Collections.emptyMap()); assertThat(result.verified, is(true)); assertEquals(0, result.extractions.size()); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST); - result = analyze(bq.build()); + result = analyze(bq.build(), Collections.emptyMap()); assertThat(result.verified, is(false)); assertEquals(0, result.extractions.size()); bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD); - result = analyze(bq.build()); + result = analyze(bq.build(), Collections.emptyMap()); assertThat(result.verified, is(true)); assertTermsEqual(result.extractions, new Term("field", "value")); @@ -380,18 +383,18 @@ public class QueryAnalyzerTests extends ESTestCase { Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")), 1f ); - result = analyze(disjunctionMaxQuery); + result = analyze(disjunctionMaxQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); assertTermsEqual(result.extractions, new Term("field", "value")); } public void testExtractQueryMetadata_matchAllDocsQuery() { - expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery())); + expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery(), Collections.emptyMap())); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); - Result result = analyze(builder.build()); + Result result = analyze(builder.build(), Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, new Term("field", "value")); @@ -400,39 +403,40 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); BooleanQuery bq1 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq1)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq1, Collections.emptyMap())); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); BooleanQuery bq2 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq2)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Collections.emptyMap())); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq3 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq3)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq3, Collections.emptyMap())); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq4 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq4)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq4, Collections.emptyMap())); builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq5 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq5)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq5, Collections.emptyMap())); } public void testExtractQueryMetadata_unsupportedQuery() { TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false); - UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(termRangeQuery)); + UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, + () -> analyze(termRangeQuery, Collections.emptyMap())); assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery)); TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); @@ -441,7 +445,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(termRangeQuery, BooleanClause.Occur.SHOULD); BooleanQuery bq = builder.build(); - e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq)); + e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq, Collections.emptyMap())); assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery)); } @@ -454,7 +458,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(unsupportedQuery, BooleanClause.Occur.MUST); BooleanQuery bq1 = builder.build(); - Result result = analyze(bq1); + Result result = analyze(bq1, Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, termQuery1.getTerm()); @@ -464,7 +468,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(termQuery2, BooleanClause.Occur.MUST); builder.add(unsupportedQuery, BooleanClause.Occur.MUST); bq1 = builder.build(); - result = analyze(bq1); + result = analyze(bq1, Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, termQuery2.getTerm()); @@ -472,7 +476,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(unsupportedQuery, BooleanClause.Occur.MUST); builder.add(unsupportedQuery, BooleanClause.Occur.MUST); BooleanQuery bq2 = builder.build(); - UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq2)); + UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Collections.emptyMap())); assertThat(e.getUnsupportedQuery(), sameInstance(unsupportedQuery)); } @@ -485,7 +489,7 @@ public class QueryAnalyzerTests extends ESTestCase { Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f ); - Result result = analyze(disjunctionMaxQuery); + Result result = analyze(disjunctionMaxQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.extractions); terms.sort(Comparator.comparing(qt -> qt.term)); @@ -503,7 +507,7 @@ public class QueryAnalyzerTests extends ESTestCase { Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f ); - result = analyze(disjunctionMaxQuery); + result = analyze(disjunctionMaxQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); terms = new ArrayList<>(result.extractions); terms.sort(Comparator.comparing(qt -> qt.term)); @@ -520,12 +524,12 @@ public class QueryAnalyzerTests extends ESTestCase { public void testSynonymQuery() { SynonymQuery query = new SynonymQuery(); - Result result = analyze(query); + Result result = analyze(query, Collections.emptyMap()); assertThat(result.verified, is(true)); assertThat(result.extractions.isEmpty(), is(true)); query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2")); - result = analyze(query); + result = analyze(query, Collections.emptyMap()); assertThat(result.verified, is(true)); assertTermsEqual(result.extractions, new Term("_field", "_value1"), new Term("_field", "_value2")); } @@ -533,13 +537,13 @@ public class QueryAnalyzerTests extends ESTestCase { public void testFunctionScoreQuery() { TermQuery termQuery = new TermQuery(new Term("_field", "_value")); FunctionScoreQuery functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(0, 0, null)); - Result result = analyze(functionScoreQuery); + Result result = analyze(functionScoreQuery, Collections.emptyMap()); assertThat(result.verified, is(true)); assertTermsEqual(result.extractions, new Term("_field", "_value")); functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(0, 0, null), CombineFunction.MULTIPLY, 1f, 10f); - result = analyze(functionScoreQuery); + result = analyze(functionScoreQuery, Collections.emptyMap()); assertThat(result.verified, is(false)); assertTermsEqual(result.extractions, new Term("_field", "_value")); } @@ -547,45 +551,106 @@ public class QueryAnalyzerTests extends ESTestCase { public void testSelectBestExtraction() { Set queryTerms1 = terms(new int[0], "12", "1234", "12345"); Set queryTerms2 = terms(new int[0], "123", "1234", "12345"); - Set result = selectBestExtraction(queryTerms1, queryTerms2); + Set result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame(queryTerms2, result); queryTerms1 = terms(new int[]{1, 2, 3}); queryTerms2 = terms(new int[]{2, 3, 4}); - result = selectBestExtraction(queryTerms1, queryTerms2); + result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame(queryTerms1, result); queryTerms1 = terms(new int[]{4, 5, 6}); queryTerms2 = terms(new int[]{1, 2, 3}); - result = selectBestExtraction(queryTerms1, queryTerms2); + result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame(queryTerms2, result); queryTerms1 = terms(new int[]{1, 2, 3}, "123", "456"); queryTerms2 = terms(new int[]{2, 3, 4}, "123", "456"); - result = selectBestExtraction(queryTerms1, queryTerms2); + result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame(queryTerms1, result); queryTerms1 = terms(new int[]{10}); queryTerms2 = terms(new int[]{1}); - result = selectBestExtraction(queryTerms1, queryTerms2); + result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame(queryTerms2, result); queryTerms1 = terms(new int[]{10}, "123"); queryTerms2 = terms(new int[]{1}); - result = selectBestExtraction(queryTerms1, queryTerms2); + result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame(queryTerms1, result); queryTerms1 = terms(new int[]{10}, "1", "123"); queryTerms2 = terms(new int[]{1}, "1", "2"); - result = selectBestExtraction(queryTerms1, queryTerms2); + result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame(queryTerms1, result); queryTerms1 = terms(new int[]{1, 2, 3}, "123", "456"); queryTerms2 = terms(new int[]{2, 3, 4}, "1", "456"); - result = selectBestExtraction(queryTerms1, queryTerms2); + result = selectBestExtraction(Collections.emptyMap(), queryTerms1, queryTerms2); assertSame("Ignoring ranges, so then prefer queryTerms1, because it has the longest shortest term", queryTerms1, result); } + public void testSelectBestExtraction_boostFields() { + Set queryTerms1 = new HashSet<>(Arrays.asList( + new QueryExtraction(new Term("status_field", "sold")), + new QueryExtraction(new Term("category", "accessory")) + )); + Set queryTerms2 = new HashSet<>(Arrays.asList( + new QueryExtraction(new Term("status_field", "instock")), + new QueryExtraction(new Term("category", "hardware")) + )); + Set result = selectBestExtraction(Collections.singletonMap("status_field", 0F), queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + + byte[] interval = new byte[Long.BYTES]; + LongPoint.encodeDimension(4, interval, 0); + queryTerms1 = new HashSet<>(Arrays.asList( + new QueryExtraction(new Term("status_field", "sold")), + new QueryExtraction(new QueryAnalyzer.Range("price", null, null, interval)) + )); + interval = new byte[Long.BYTES]; + LongPoint.encodeDimension(8, interval, 0); + queryTerms2 = new HashSet<>(Arrays.asList( + new QueryExtraction(new Term("status_field", "instock")), + new QueryExtraction(new QueryAnalyzer.Range("price", null, null, interval)) + )); + result = selectBestExtraction(Collections.singletonMap("status_field", 0F), queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + + Map boostFields = new HashMap<>(); + boostFields.put("field1", 2F); + boostFields.put("field2", 0.5F); + boostFields.put("field4", 3F); + boostFields.put("field5", 0.6F); + queryTerms1 = new HashSet<>(Arrays.asList( + new QueryExtraction(new Term("field1", "sold")), + new QueryExtraction(new Term("field2", "accessory")), + new QueryExtraction(new QueryAnalyzer.Range("field3", null, null, new byte[0])) + )); + queryTerms2 = new HashSet<>(Arrays.asList( + new QueryExtraction(new Term("field3", "sold")), + new QueryExtraction(new Term("field4", "accessory")), + new QueryExtraction(new QueryAnalyzer.Range("field5", null, null, new byte[0])) + )); + result = selectBestExtraction(boostFields, queryTerms1, queryTerms2); + assertSame(queryTerms2, result); + + boostFields.put("field2", 6F); + result = selectBestExtraction(boostFields, queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + + boostFields.put("field2", 0F); + boostFields.put("field3", 0F); + boostFields.put("field5", 0F); + result = selectBestExtraction(boostFields, queryTerms1, queryTerms2); + assertSame(queryTerms2, result); + + boostFields = new HashMap<>(); + boostFields.put("field2", 2F); + result = selectBestExtraction(boostFields, queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + } + public void testSelectBestExtraction_random() { Set terms1 = new HashSet<>(); int shortestTerms1Length = Integer.MAX_VALUE; @@ -607,7 +672,7 @@ public class QueryAnalyzerTests extends ESTestCase { sumTermLength -= length; } - Set result = selectBestExtraction(terms1, terms2); + Set result = selectBestExtraction(Collections.emptyMap(), terms1, terms2); Set expected = shortestTerms1Length >= shortestTerms2Length ? terms1 : terms2; assertThat(result, sameInstance(expected)); } @@ -615,7 +680,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testPointRangeQuery() { // int ranges get converted to long ranges: Query query = IntPoint.newRangeQuery("_field", 10, 20); - Result result = analyze(query); + Result result = analyze(query, Collections.emptyMap()); assertFalse(result.verified); List ranges = new ArrayList<>(result.extractions); assertThat(ranges.size(), equalTo(1)); @@ -625,7 +690,7 @@ public class QueryAnalyzerTests extends ESTestCase { assertDimension(ranges.get(0).range.upperPoint, bytes -> IntPoint.encodeDimension(20, bytes, 0)); query = LongPoint.newRangeQuery("_field", 10L, 21L); - result = analyze(query); + result = analyze(query, Collections.emptyMap()); assertFalse(result.verified); ranges = new ArrayList<>(result.extractions); assertThat(ranges.size(), equalTo(1)); @@ -636,7 +701,7 @@ public class QueryAnalyzerTests extends ESTestCase { // Half float ranges get converted to double ranges: query = HalfFloatPoint.newRangeQuery("_field", 10F, 20F); - result = analyze(query); + result = analyze(query, Collections.emptyMap()); assertFalse(result.verified); ranges = new ArrayList<>(result.extractions); assertThat(ranges.size(), equalTo(1)); @@ -647,7 +712,7 @@ public class QueryAnalyzerTests extends ESTestCase { // Float ranges get converted to double ranges: query = FloatPoint.newRangeQuery("_field", 10F, 20F); - result = analyze(query); + result = analyze(query, Collections.emptyMap()); assertFalse(result.verified); ranges = new ArrayList<>(result.extractions); assertThat(ranges.size(), equalTo(1)); @@ -657,7 +722,7 @@ public class QueryAnalyzerTests extends ESTestCase { assertDimension(ranges.get(0).range.upperPoint, bytes -> FloatPoint.encodeDimension(20F, bytes, 0)); query = DoublePoint.newRangeQuery("_field", 10D, 20D); - result = analyze(query); + result = analyze(query, Collections.emptyMap()); assertFalse(result.verified); ranges = new ArrayList<>(result.extractions); assertThat(ranges.size(), equalTo(1)); @@ -668,7 +733,7 @@ public class QueryAnalyzerTests extends ESTestCase { query = InetAddressPoint.newRangeQuery("_field", InetAddresses.forString("192.168.1.0"), InetAddresses.forString("192.168.1.255")); - result = analyze(query); + result = analyze(query, Collections.emptyMap()); assertFalse(result.verified); ranges = new ArrayList<>(result.extractions); assertThat(ranges.size(), equalTo(1)); @@ -681,7 +746,7 @@ public class QueryAnalyzerTests extends ESTestCase { public void testIndexOrDocValuesQuery() { Query query = new IndexOrDocValuesQuery(IntPoint.newRangeQuery("_field", 10, 20), SortedNumericDocValuesField.newSlowRangeQuery("_field", 10, 20)); - Result result = analyze(query); + Result result = analyze(query, Collections.emptyMap()); assertFalse(result.verified); List ranges = new ArrayList<>(result.extractions); assertThat(ranges.size(), equalTo(1)); @@ -695,7 +760,7 @@ public class QueryAnalyzerTests extends ESTestCase { BooleanQuery.Builder boolQuery = new BooleanQuery.Builder(); boolQuery.add(LongPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); boolQuery.add(LongPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); - Result result = analyze(boolQuery.build()); + Result result = analyze(boolQuery.build(), Collections.emptyMap()); assertFalse(result.verified); assertEquals(1, result.extractions.size()); assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); @@ -703,7 +768,7 @@ public class QueryAnalyzerTests extends ESTestCase { boolQuery = new BooleanQuery.Builder(); boolQuery.add(LongPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); boolQuery.add(IntPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); - result = analyze(boolQuery.build()); + result = analyze(boolQuery.build(), Collections.emptyMap()); assertFalse(result.verified); assertEquals(1, result.extractions.size()); assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); @@ -711,7 +776,7 @@ public class QueryAnalyzerTests extends ESTestCase { boolQuery = new BooleanQuery.Builder(); boolQuery.add(DoublePoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); boolQuery.add(DoublePoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); - result = analyze(boolQuery.build()); + result = analyze(boolQuery.build(), Collections.emptyMap()); assertFalse(result.verified); assertEquals(1, result.extractions.size()); assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); @@ -719,7 +784,7 @@ public class QueryAnalyzerTests extends ESTestCase { boolQuery = new BooleanQuery.Builder(); boolQuery.add(DoublePoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); boolQuery.add(FloatPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); - result = analyze(boolQuery.build()); + result = analyze(boolQuery.build(), Collections.emptyMap()); assertFalse(result.verified); assertEquals(1, result.extractions.size()); assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); @@ -727,7 +792,7 @@ public class QueryAnalyzerTests extends ESTestCase { boolQuery = new BooleanQuery.Builder(); boolQuery.add(HalfFloatPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); boolQuery.add(HalfFloatPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); - result = analyze(boolQuery.build()); + result = analyze(boolQuery.build(), Collections.emptyMap()); assertFalse(result.verified); assertEquals(1, result.extractions.size()); assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName);