Don't index ranges including NOW in percolator (#52748)

Currently, date ranges queries using NOW-based date math are rewritten to
MatchAllDocs queries when being preprocessed for the percolator. However,
since we added the verification step, this can result in incorrect matches when
percolator queries are run without scores. This commit changes things to instead
wrap date queries that use NOW with a new DateRangeIncludingNowQuery.
This is a simple wrapper query that returns its delegate at rewrite time, but it can
be detected by the percolator QueryAnalyzer and be dealt with accordingly.

This also allows us to remove a method on QueryRewriteContext, and push all
logic relating to NOW-based ranges into the DateFieldMapper.

Fixes #52617
This commit is contained in:
Alan Woodward 2020-02-25 12:00:23 +00:00
parent 1a14ae4e1b
commit 18663b0a85
10 changed files with 147 additions and 60 deletions

View File

@ -410,14 +410,7 @@ public class PercolatorFieldMapper extends FieldMapper {
Version indexVersion = context.mapperService().getIndexSettings().getIndexVersionCreated();
createQueryBuilderField(indexVersion, queryBuilderField, queryBuilder, context);
QueryBuilder queryBuilderForProcessing = queryBuilder.rewrite(new QueryShardContext(queryShardContext) {
@Override
public boolean convertNowRangeToMatchAll() {
return true;
}
});
Query query = toQuery(queryShardContext, isMapUnmappedFieldAsText(), queryBuilderForProcessing);
Query query = toQuery(queryShardContext, isMapUnmappedFieldAsText(), queryBuilder);
processQuery(query, context);
}

View File

@ -40,6 +40,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.Version;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import org.elasticsearch.index.query.DateRangeIncludingNowQuery;
import java.util.ArrayList;
import java.util.Arrays;
@ -155,6 +156,10 @@ final class QueryAnalyzer {
@Override
public QueryVisitor getSubVisitor(Occur occur, Query parent) {
if (parent instanceof DateRangeIncludingNowQuery) {
terms.add(Result.UNKNOWN);
return QueryVisitor.EMPTY_VISITOR;
}
this.verified = isVerified(parent);
if (occur == Occur.MUST || occur == Occur.FILTER) {
ResultBuilder builder = new ResultBuilder(version, true);

View File

@ -510,6 +510,17 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(1));
qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder);
queryBuilder = rangeQuery("date_field").from("now");
doc = mapperService.documentMapper().parse(new SourceToParse("test", "doc", "1", BytesReference.bytes(XContentFactory
.jsonBuilder()
.startObject()
.field(fieldName, queryBuilder)
.endObject()),
XContentType.JSON));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name())[0].stringValue(),
equalTo(EXTRACTION_FAILED));
}
public void testStoringQueries() throws Exception {

View File

@ -49,6 +49,7 @@ import static org.elasticsearch.common.xcontent.XContentFactory.smileBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.yamlBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.commonTermsQuery;
import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.geoBoundingBoxQuery;
import static org.elasticsearch.index.query.QueryBuilders.geoDistanceQuery;
import static org.elasticsearch.index.query.QueryBuilders.geoPolygonQuery;
@ -941,4 +942,36 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet());
}
}
public void testWrappedWithConstantScore() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test")
.addMapping("_doc", "d", "type=date", "q", "type=percolator")
);
client().prepareIndex("test", "_doc").setId("1")
.setSource(jsonBuilder().startObject().field("q",
boolQuery().must(rangeQuery("d").gt("now"))
).endObject())
.execute().actionGet();
client().prepareIndex("test", "_doc").setId("2")
.setSource(jsonBuilder().startObject().field("q",
boolQuery().must(rangeQuery("d").lt("now"))
).endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh().get();
SearchResponse response = client().prepareSearch("test").setQuery(new PercolateQueryBuilder("q",
BytesReference.bytes(jsonBuilder().startObject().field("d", "2020-02-01T15:00:00.000+11:00").endObject()),
XContentType.JSON)).get();
assertEquals(1, response.getHits().getTotalHits().value);
response = client().prepareSearch("test").setQuery(constantScoreQuery(new PercolateQueryBuilder("q",
BytesReference.bytes(jsonBuilder().startObject().field("d", "2020-02-01T15:00:00.000+11:00").endObject()),
XContentType.JSON))).get();
assertEquals(1, response.getHits().getTotalHits().value);
}
}

View File

@ -49,6 +49,7 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
import org.elasticsearch.index.query.DateRangeIncludingNowQuery;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.DocValueFormat;
@ -386,11 +387,16 @@ public final class DateFieldMapper extends FieldMapper {
DateMathParser parser = forcedDateParser == null
? dateMathParser
: forcedDateParser;
boolean[] nowUsed = new boolean[1];
LongSupplier nowSupplier = () -> {
nowUsed[0] = true;
return context.nowInMillis();
};
long l, u;
if (lowerTerm == null) {
l = Long.MIN_VALUE;
} else {
l = parseToLong(lowerTerm, !includeLower, timeZone, parser, context::nowInMillis);
l = parseToLong(lowerTerm, !includeLower, timeZone, parser, nowSupplier);
if (includeLower == false) {
++l;
}
@ -398,7 +404,7 @@ public final class DateFieldMapper extends FieldMapper {
if (upperTerm == null) {
u = Long.MAX_VALUE;
} else {
u = parseToLong(upperTerm, includeUpper, timeZone, parser, context::nowInMillis);
u = parseToLong(upperTerm, includeUpper, timeZone, parser, nowSupplier);
if (includeUpper == false) {
--u;
}
@ -408,6 +414,9 @@ public final class DateFieldMapper extends FieldMapper {
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
query = new IndexOrDocValuesQuery(query, dvQuery);
}
if (nowUsed[0]) {
query = new DateRangeIncludingNowQuery(query);
}
return query;
}

View File

@ -0,0 +1,74 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import java.io.IOException;
import java.util.Objects;
/**
* A simple wrapper class that indicates that the wrapped query has made use of NOW
* when parsing its datemath. Useful for preprocessors such as the percolator that
* need to know when not to extract dates from the query.
*/
public class DateRangeIncludingNowQuery extends Query {
private final Query in;
public DateRangeIncludingNowQuery(Query in) {
this.in = in;
}
public Query getQuery() {
return in;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
return in;
}
@Override
public String toString(String field) {
return "DateRangeIncludingNowQuery(" + in + ")";
}
@Override
public void visit(QueryVisitor visitor) {
in.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DateRangeIncludingNowQuery that = (DateRangeIncludingNowQuery) o;
return Objects.equals(in, that.in);
}
@Override
public int hashCode() {
return Objects.hash(in);
}
}

View File

@ -124,15 +124,4 @@ public class QueryRewriteContext {
}
}
/**
* In pre-processing contexts that happen at index time 'now' date ranges should be replaced by a {@link MatchAllQueryBuilder}.
* Otherwise documents that should match at query time would never match and the document that have fallen outside the
* date range would continue to match.
*
* @return indicates whether range queries with date ranges using 'now' are rewritten to a {@link MatchAllQueryBuilder}.
*/
public boolean convertNowRangeToMatchAll() {
return false;
}
}

View File

@ -451,16 +451,6 @@ public class RangeQueryBuilder extends AbstractQueryBuilder<RangeQueryBuilder> i
@Override
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
// Percolator queries get rewritten and pre-processed at index time.
// If a range query has a date range using 'now' and 'now' gets resolved at index time then
// the pre-processing uses that to pre-process. This can then lead to mismatches at query time.
if (queryRewriteContext.convertNowRangeToMatchAll()) {
if ((from() != null && from().toString().contains("now")) ||
(to() != null && to().toString().contains("now"))) {
return new MatchAllQueryBuilder();
}
}
final MappedFieldType.Relation relation = getRelation(queryRewriteContext);
switch (relation) {
case DISJOINT:

View File

@ -48,6 +48,7 @@ import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
import org.elasticsearch.index.mapper.ParseContext.Document;
import org.elasticsearch.index.query.DateRangeIncludingNowQuery;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.QueryShardContext;
import org.joda.time.DateTimeZone;
@ -269,6 +270,15 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
assertEquals(expected,
ft.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(new MultiReader()));
instant1 = nowInMillis;
instant2 = instant1 + 100;
expected = new DateRangeIncludingNowQuery(new IndexOrDocValuesQuery(
LongPoint.newRangeQuery("field", instant1, instant2),
SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2)
));
assertEquals(expected,
ft.rangeQuery("now", instant2, true, true, null, null, null, context));
ft.setIndexOptions(IndexOptions.NONE);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> ft.rangeQuery(date1, date2, true, true, null, null, null, context));

View File

@ -346,6 +346,8 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase<RangeQueryBuil
"}";
QueryShardContext context = createShardContext();
Query parsedQuery = parseQuery(query).toQuery(context);
assertThat(parsedQuery, instanceOf(DateRangeIncludingNowQuery.class));
parsedQuery = ((DateRangeIncludingNowQuery)parsedQuery).getQuery();
assertThat(parsedQuery, instanceOf(IndexOrDocValuesQuery.class));
parsedQuery = ((IndexOrDocValuesQuery) parsedQuery).getIndexQuery();
assertThat(parsedQuery, instanceOf(PointRangeQuery.class));
@ -569,35 +571,6 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase<RangeQueryBuil
assertEquals(ShapeRelation.INTERSECTS, builder.relation());
}
public void testConvertNowRangeToMatchAll() throws IOException {
RangeQueryBuilder query = new RangeQueryBuilder(DATE_FIELD_NAME);
DateTime queryFromValue = new DateTime(2019, 1, 1, 0, 0, 0, ISOChronology.getInstanceUTC());
DateTime queryToValue = new DateTime(2020, 1, 1, 0, 0, 0, ISOChronology.getInstanceUTC());
if (randomBoolean()) {
query.from("now");
query.to(queryToValue);
} else if (randomBoolean()) {
query.from(queryFromValue);
query.to("now");
} else {
query.from("now");
query.to("now+1h");
}
QueryShardContext queryShardContext = createShardContext();
QueryBuilder rewritten = query.rewrite(queryShardContext);
assertThat(rewritten, instanceOf(RangeQueryBuilder.class));
queryShardContext = new QueryShardContext(queryShardContext) {
@Override
public boolean convertNowRangeToMatchAll() {
return true;
}
};
rewritten = query.rewrite(queryShardContext);
assertThat(rewritten, instanceOf(MatchAllQueryBuilder.class));
}
public void testTypeField() throws IOException {
RangeQueryBuilder builder = QueryBuilders.rangeQuery("_type")
.from("value1");