Add support for selecting percolator query candidate matches containing range queries.

Extracts ranges from range queries on byte, short, integer, long, half_float, scaled_float, float, double, date and ip fields.
byte, short, integer and date ranges are normalized to Lucene's LongRange.
half_float and float are normalized to Lucene's DoubleRange.

When extracting range queries, the QueryAnalyzer computes the width of the range.  This width is used to determine
what range should be preferred in a conjunction query. The QueryAnalyzer prefers the smaller ranges, because these
ranges tend to match with less documents.

Closes #21040
This commit is contained in:
Martijn van Groningen 2017-07-11 15:34:59 +02:00
parent b72c71083c
commit edad7b4737
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
8 changed files with 970 additions and 172 deletions

View File

@ -0,0 +1,71 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
/**
* A range field for binary encoded ranges
*/
public final class BinaryRange extends Field {
/** The number of bytes per dimension, use {@link InetAddressPoint#BYTES} as max, because that is maximum we need to support */
public static final int BYTES = InetAddressPoint.BYTES;
private static final FieldType TYPE;
static {
TYPE = new FieldType();
TYPE.setDimensions(2, BYTES);
TYPE.freeze();
}
/**
* Create a new BinaryRange from a provided encoded binary range
* @param name field name. must not be null.
* @param encodedRange Encoded range
*/
public BinaryRange(String name, byte[] encodedRange) {
super(name, TYPE);
if (encodedRange.length != BYTES * 2) {
throw new IllegalArgumentException("Unexpected encoded range length [" + encodedRange.length + "]");
}
fieldsData = new BytesRef(encodedRange);
}
/**
* Create a query for matching indexed ip ranges that {@code INTERSECT} the defined range.
* @param field field name. must not be null.
* @param encodedRange Encoded range
* @return query for matching intersecting encoded ranges (overlap, within, crosses, or contains)
* @throws IllegalArgumentException if {@code field} is null, {@code min} or {@code max} is invalid
*/
public static Query newIntersectsQuery(String field, byte[] encodedRange) {
return newRelationQuery(field, encodedRange, RangeFieldQuery.QueryType.INTERSECTS);
}
static Query newRelationQuery(String field, byte[] encodedRange, RangeFieldQuery.QueryType relation) {
return new RangeFieldQuery(field, encodedRange, 1, relation) {
@Override
protected String toString(byte[] ranges, int dimension) {
return "[" + new BytesRef(ranges, 0, BYTES) + " TO " + new BytesRef(ranges, BYTES, BYTES) + "]";
}
};
}
}

View File

@ -44,7 +44,7 @@ import java.util.Set;
final class PercolateQuery extends Query implements Accountable {
// cost of matching the query against the document, arbitrary as it would be really complex to estimate
public static final float MATCH_COST = 1000;
private static final float MATCH_COST = 1000;
private final QueryStore queryStore;
private final BytesReference documentSource;
@ -164,15 +164,15 @@ final class PercolateQuery extends Query implements Accountable {
};
}
public IndexSearcher getPercolatorIndexSearcher() {
IndexSearcher getPercolatorIndexSearcher() {
return percolatorIndexSearcher;
}
public BytesReference getDocumentSource() {
BytesReference getDocumentSource() {
return documentSource;
}
public QueryStore getQueryStore() {
QueryStore getQueryStore() {
return queryStore;
}

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.percolator;
import org.apache.lucene.document.BinaryRange;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -25,6 +26,7 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -40,6 +42,7 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.hash.MurmurHash3;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -54,6 +57,8 @@ import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.index.mapper.RangeFieldMapper.RangeType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.BoostingQueryBuilder;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
@ -65,9 +70,11 @@ import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -77,10 +84,10 @@ import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQuery
public class PercolatorFieldMapper extends FieldMapper {
public static final XContentType QUERY_BUILDER_CONTENT_TYPE = XContentType.SMILE;
public static final Setting<Boolean> INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING =
static final XContentType QUERY_BUILDER_CONTENT_TYPE = XContentType.SMILE;
static final Setting<Boolean> INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING =
Setting.boolSetting("index.percolator.map_unmapped_fields_as_string", false, Setting.Property.IndexScope);
public static final String CONTENT_TYPE = "percolator";
static final String CONTENT_TYPE = "percolator";
private static final FieldType FIELD_TYPE = new FieldType();
static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
@ -88,15 +95,16 @@ public class PercolatorFieldMapper extends FieldMapper {
static final String EXTRACTION_PARTIAL = "partial";
static final String EXTRACTION_FAILED = "failed";
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
static final String RANGE_FIELD_NAME = "range_field";
public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
private final Supplier<QueryShardContext> queryShardContext;
public Builder(String fieldName, Supplier<QueryShardContext> queryShardContext) {
Builder(String fieldName, Supplier<QueryShardContext> queryShardContext) {
super(fieldName, FIELD_TYPE, FIELD_TYPE);
this.queryShardContext = queryShardContext;
}
@ -111,11 +119,15 @@ public class PercolatorFieldMapper extends FieldMapper {
fieldType.extractionResultField = extractionResultField.fieldType();
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
fieldType.queryBuilderField = queryBuilderField.fieldType();
// Range field is of type ip, because that matches closest with BinaryRange field. Otherwise we would
// have to introduce a new field type...
RangeFieldMapper rangeFieldMapper = createExtractedRangeFieldBuilder(RANGE_FIELD_NAME, RangeType.IP, context);
fieldType.rangeField = rangeFieldMapper.fieldType();
context.path().remove();
setupFieldType(context);
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
extractionResultField, queryBuilderField);
extractionResultField, queryBuilderField, rangeFieldMapper);
}
static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
@ -135,9 +147,16 @@ public class PercolatorFieldMapper extends FieldMapper {
return builder.build(context);
}
static RangeFieldMapper createExtractedRangeFieldBuilder(String name, RangeType rangeType, BuilderContext context) {
RangeFieldMapper.Builder builder = new RangeFieldMapper.Builder(name, rangeType, context.indexCreatedVersion());
// For now no doc values, because in processQuery(...) only the Lucene range fields get added:
builder.docValues(false);
return builder.build(context);
}
}
public static class TypeParser implements FieldMapper.TypeParser {
static class TypeParser implements FieldMapper.TypeParser {
@Override
public Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
@ -145,23 +164,26 @@ public class PercolatorFieldMapper extends FieldMapper {
}
}
public static class FieldType extends MappedFieldType {
static class FieldType extends MappedFieldType {
MappedFieldType queryTermsField;
MappedFieldType extractionResultField;
MappedFieldType queryBuilderField;
public FieldType() {
RangeFieldMapper.RangeFieldType rangeField;
FieldType() {
setIndexOptions(IndexOptions.NONE);
setDocValuesType(DocValuesType.NONE);
setStored(false);
}
public FieldType(FieldType ref) {
FieldType(FieldType ref) {
super(ref);
queryTermsField = ref.queryTermsField;
extractionResultField = ref.extractionResultField;
queryBuilderField = ref.queryBuilderField;
rangeField = ref.rangeField;
}
@Override
@ -198,33 +220,49 @@ public class PercolatorFieldMapper extends FieldMapper {
Query createCandidateQuery(IndexReader indexReader) throws IOException {
List<BytesRef> extractedTerms = new ArrayList<>();
Map<String, List<byte[]>> encodedPointValuesByField = new HashMap<>();
LeafReader reader = indexReader.leaves().get(0).reader();
for (FieldInfo info : reader.getFieldInfos()) {
Terms terms = reader.terms(info.name);
if (terms == null) {
continue;
if (terms != null) {
BytesRef fieldBr = new BytesRef(info.name);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(builder.toBytesRef());
}
}
BytesRef fieldBr = new BytesRef(info.name);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(builder.toBytesRef());
if (info.getPointDimensionCount() == 1) { // not != 0 because range fields are not supported
PointValues values = reader.getPointValues(info.name);
List<byte[]> encodedPointValues = new ArrayList<>();
encodedPointValues.add(values.getMinPackedValue().clone());
encodedPointValues.add(values.getMaxPackedValue().clone());
encodedPointValuesByField.put(info.name, encodedPointValues);
}
}
Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
if (extractedTerms.size() != 0) {
builder.add(new TermInSetQuery(queryTermsField.name(), extractedTerms), Occur.SHOULD);
}
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED));
builder.add(new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED)), Occur.SHOULD);
return new BooleanQuery.Builder()
.add(extractionSuccess, Occur.SHOULD)
.add(extractionFailure, Occur.SHOULD)
.build();
for (Map.Entry<String, List<byte[]>> entry : encodedPointValuesByField.entrySet()) {
String rangeFieldName = entry.getKey();
List<byte[]> encodedPointValues = entry.getValue();
byte[] min = encodedPointValues.get(0);
byte[] max = encodedPointValues.get(1);
Query query = BinaryRange.newIntersectsQuery(rangeField.name(), encodeRange(rangeFieldName, min, max));
builder.add(query, Occur.SHOULD);
}
return builder.build();
}
}
@ -235,17 +273,20 @@ public class PercolatorFieldMapper extends FieldMapper {
private KeywordFieldMapper extractionResultField;
private BinaryFieldMapper queryBuilderField;
public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
private RangeFieldMapper rangeFieldMapper;
PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo,
Supplier<QueryShardContext> queryShardContext,
KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField,
BinaryFieldMapper queryBuilderField) {
BinaryFieldMapper queryBuilderField, RangeFieldMapper rangeFieldMapper) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
this.queryShardContext = queryShardContext;
this.queryTermsField = queryTermsField;
this.extractionResultField = extractionResultField;
this.queryBuilderField = queryBuilderField;
this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
this.rangeFieldMapper = rangeFieldMapper;
}
@Override
@ -254,9 +295,10 @@ public class PercolatorFieldMapper extends FieldMapper {
KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType);
BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
RangeFieldMapper rangeFieldMapperUpdated = (RangeFieldMapper) rangeFieldMapper.updateFieldType(fullNameToFieldType);
if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField
&& queryBuilderUpdated == queryBuilderField) {
&& queryBuilderUpdated == queryBuilderField && rangeFieldMapperUpdated == rangeFieldMapper) {
return this;
}
if (updated == this) {
@ -265,6 +307,7 @@ public class PercolatorFieldMapper extends FieldMapper {
updated.queryTermsField = queryTermsUpdated;
updated.extractionResultField = extractionResultUpdated;
updated.queryBuilderField = queryBuilderUpdated;
updated.rangeFieldMapper = rangeFieldMapperUpdated;
return updated;
}
@ -310,12 +353,18 @@ public class PercolatorFieldMapper extends FieldMapper {
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
return;
}
for (Term term : result.terms) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes());
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
for (QueryAnalyzer.QueryExtraction term : result.extractions) {
if (term.term != null) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes());
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
} else if (term.range != null) {
byte[] min = term.range.lowerPoint;
byte[] max = term.range.upperPoint;
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(term.range.fieldName, min, max)));
}
}
if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
@ -324,7 +373,7 @@ public class PercolatorFieldMapper extends FieldMapper {
}
}
public static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException {
static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException {
return toQuery(context, mapUnmappedFieldsAsString, parseQueryBuilder(parser, parser.getTokenLocation()));
}
@ -356,7 +405,7 @@ public class PercolatorFieldMapper extends FieldMapper {
@Override
public Iterator<Mapper> iterator() {
return Arrays.<Mapper>asList(queryTermsField, extractionResultField, queryBuilderField).iterator();
return Arrays.<Mapper>asList(queryTermsField, extractionResultField, queryBuilderField, rangeFieldMapper).iterator();
}
@Override
@ -369,7 +418,6 @@ public class PercolatorFieldMapper extends FieldMapper {
return CONTENT_TYPE;
}
/**
* Fails if a percolator contains an unsupported query. The following queries are not supported:
* 1) a has_child query
@ -405,4 +453,24 @@ public class PercolatorFieldMapper extends FieldMapper {
}
}
static byte[] encodeRange(String rangeFieldName, byte[] minEncoded, byte[] maxEncoded) {
assert minEncoded.length == maxEncoded.length;
byte[] bytes = new byte[BinaryRange.BYTES * 2];
// First compute hash for field name and write the full hash into the byte array
BytesRef fieldAsBytesRef = new BytesRef(rangeFieldName);
MurmurHash3.Hash128 hash = new MurmurHash3.Hash128();
MurmurHash3.hash128(fieldAsBytesRef.bytes, fieldAsBytesRef.offset, fieldAsBytesRef.length, 0, hash);
ByteBuffer bb = ByteBuffer.wrap(bytes);
bb.putLong(hash.h1).putLong(hash.h2).putLong(hash.h1).putLong(hash.h2);
assert bb.position() == bb.limit();
// Secondly, overwrite the min and max encoded values in the byte array
// This way we are able to reuse as much as possible from the hash for any range type.
int offset = BinaryRange.BYTES - minEncoded.length;
System.arraycopy(minEncoded, 0, bytes, offset, minEncoded.length);
System.arraycopy(maxEncoded, 0, bytes, BinaryRange.BYTES + offset, maxEncoded.length);
return bytes;
}
}

View File

@ -50,7 +50,7 @@ import java.util.Map;
* Highlighting in the case of the percolate query is a bit different, because the PercolateQuery itself doesn't get highlighted,
* but the source of the PercolateQuery gets highlighted by each hit containing a query.
*/
public final class PercolatorHighlightSubFetchPhase extends HighlightPhase {
final class PercolatorHighlightSubFetchPhase extends HighlightPhase {
PercolatorHighlightSubFetchPhase(Settings settings, Map<String, Highlighter> highlighters) {
super(settings, highlighters);

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.percolator;
import org.apache.lucene.document.BinaryRange;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
@ -30,6 +31,7 @@ import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermInSetQuery;
@ -41,6 +43,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
@ -51,10 +54,13 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
public final class QueryAnalyzer {
import static java.util.stream.Collectors.toSet;
final class QueryAnalyzer {
private static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
@ -78,6 +84,7 @@ public final class QueryAnalyzer {
map.put(DisjunctionMaxQuery.class, disjunctionMaxQuery());
map.put(SynonymQuery.class, synonymQuery());
map.put(FunctionScoreQuery.class, functionScoreQuery());
map.put(PointRangeQuery.class, pointRangeQuery());
queryProcessors = Collections.unmodifiableMap(map);
}
@ -85,7 +92,7 @@ public final class QueryAnalyzer {
}
/**
* Extracts terms from the provided query. These terms are stored with the percolator query and
* Extracts terms and ranges from the provided query. These terms and ranges are stored with the percolator query and
* used by the percolate query's candidate query as fields to be query by. The candidate query
* holds the terms from the document to be percolated and allows to the percolate query to ignore
* percolator queries that we know would otherwise never match.
@ -104,11 +111,11 @@ public final class QueryAnalyzer {
* since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
*
* <p>
* Sometimes the query analyzer can't always extract terms from a sub query, if that happens then
* Sometimes the query analyzer can't always extract terms or ranges from a sub query, if that happens then
* query analysis is stopped and an UnsupportedQueryException is thrown. So that the caller can mark
* this query in such a way that the PercolatorQuery always verifies if this query with the MemoryIndex.
*/
public static Result analyze(Query query) {
static Result analyze(Query query) {
Class queryClass = query.getClass();
if (queryClass.isAnonymousClass()) {
// Sometimes queries have anonymous classes in that case we need the direct super class.
@ -123,65 +130,65 @@ public final class QueryAnalyzer {
}
}
static Function<Query, Result> matchNoDocsQuery() {
private static Function<Query, Result> matchNoDocsQuery() {
return (query -> new Result(true, Collections.emptySet()));
}
static Function<Query, Result> constantScoreQuery() {
private static Function<Query, Result> constantScoreQuery() {
return query -> {
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
return analyze(wrappedQuery);
};
}
static Function<Query, Result> boostQuery() {
private static Function<Query, Result> boostQuery() {
return query -> {
Query wrappedQuery = ((BoostQuery) query).getQuery();
return analyze(wrappedQuery);
};
}
static Function<Query, Result> termQuery() {
private static Function<Query, Result> termQuery() {
return (query -> {
TermQuery termQuery = (TermQuery) query;
return new Result(true, Collections.singleton(termQuery.getTerm()));
return new Result(true, Collections.singleton(new QueryExtraction(termQuery.getTerm())));
});
}
static Function<Query, Result> termInSetQuery() {
private static Function<Query, Result> termInSetQuery() {
return query -> {
TermInSetQuery termInSetQuery = (TermInSetQuery) query;
Set<Term> terms = new HashSet<>();
Set<QueryExtraction> terms = new HashSet<>();
PrefixCodedTerms.TermIterator iterator = termInSetQuery.getTermData().iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
terms.add(new Term(iterator.field(), term));
terms.add(new QueryExtraction(new Term(iterator.field(), term)));
}
return new Result(true, terms);
};
}
static Function<Query, Result> synonymQuery() {
private static Function<Query, Result> synonymQuery() {
return query -> {
Set<Term> terms = new HashSet<>(((SynonymQuery) query).getTerms());
Set<QueryExtraction> terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
return new Result(true, terms);
};
}
static Function<Query, Result> commonTermsQuery() {
private static Function<Query, Result> commonTermsQuery() {
return query -> {
List<Term> terms = ((CommonTermsQuery) query).getTerms();
return new Result(false, new HashSet<>(terms));
Set<QueryExtraction> terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
return new Result(false, terms);
};
}
static Function<Query, Result> blendedTermQuery() {
private static Function<Query, Result> blendedTermQuery() {
return query -> {
List<Term> terms = ((BlendedTermQuery) query).getTerms();
return new Result(true, new HashSet<>(terms));
Set<QueryExtraction> terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
return new Result(true, terms);
};
}
static Function<Query, Result> phraseQuery() {
private static Function<Query, Result> phraseQuery() {
return query -> {
Term[] terms = ((PhraseQuery) query).getTerms();
if (terms.length == 0) {
@ -196,70 +203,71 @@ public final class QueryAnalyzer {
longestTerm = term;
}
}
return new Result(false, Collections.singleton(longestTerm));
return new Result(false, Collections.singleton(new QueryExtraction(longestTerm)));
};
}
static Function<Query, Result> multiPhraseQuery() {
private static Function<Query, Result> multiPhraseQuery() {
return query -> {
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays();
if (terms.length == 0) {
return new Result(true, Collections.emptySet());
}
Set<Term> bestTermArr = null;
Set<QueryExtraction> bestTermArr = null;
for (Term[] termArr : terms) {
bestTermArr = selectTermListWithTheLongestShortestTerm(bestTermArr, new HashSet<>(Arrays.asList(termArr)));
Set<QueryExtraction> queryExtractions = Arrays.stream(termArr).map(QueryExtraction::new).collect(toSet());
bestTermArr = selectBestExtraction(bestTermArr, queryExtractions);
}
return new Result(false, bestTermArr);
};
}
static Function<Query, Result> spanTermQuery() {
private static Function<Query, Result> spanTermQuery() {
return query -> {
Term term = ((SpanTermQuery) query).getTerm();
return new Result(true, Collections.singleton(term));
return new Result(true, Collections.singleton(new QueryExtraction(term)));
};
}
static Function<Query, Result> spanNearQuery() {
private static Function<Query, Result> spanNearQuery() {
return query -> {
Set<Term> bestClauses = null;
Set<QueryExtraction> bestClauses = null;
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
for (SpanQuery clause : spanNearQuery.getClauses()) {
Result temp = analyze(clause);
bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses);
bestClauses = selectBestExtraction(temp.extractions, bestClauses);
}
return new Result(false, bestClauses);
};
}
static Function<Query, Result> spanOrQuery() {
private static Function<Query, Result> spanOrQuery() {
return query -> {
Set<Term> terms = new HashSet<>();
Set<QueryExtraction> terms = new HashSet<>();
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
for (SpanQuery clause : spanOrQuery.getClauses()) {
terms.addAll(analyze(clause).terms);
terms.addAll(analyze(clause).extractions);
}
return new Result(false, terms);
};
}
static Function<Query, Result> spanNotQuery() {
private static Function<Query, Result> spanNotQuery() {
return query -> {
Result result = analyze(((SpanNotQuery) query).getInclude());
return new Result(false, result.terms);
return new Result(false, result.extractions);
};
}
static Function<Query, Result> spanFirstQuery() {
private static Function<Query, Result> spanFirstQuery() {
return query -> {
Result result = analyze(((SpanFirstQuery) query).getMatch());
return new Result(false, result.terms);
return new Result(false, result.extractions);
};
}
static Function<Query, Result> booleanQuery() {
private static Function<Query, Result> booleanQuery() {
return query -> {
BooleanQuery bq = (BooleanQuery) query;
List<BooleanClause> clauses = bq.clauses();
@ -279,7 +287,7 @@ public final class QueryAnalyzer {
}
}
if (numRequiredClauses > 0) {
Set<Term> bestClause = null;
Set<QueryExtraction> bestClause = null;
UnsupportedQueryException uqe = null;
for (BooleanClause clause : clauses) {
if (clause.isRequired() == false) {
@ -296,7 +304,7 @@ public final class QueryAnalyzer {
uqe = e;
continue;
}
bestClause = selectTermListWithTheLongestShortestTerm(temp.terms, bestClause);
bestClause = selectBestExtraction(temp.extractions, bestClause);
}
if (bestClause != null) {
return new Result(false, bestClause);
@ -321,14 +329,14 @@ public final class QueryAnalyzer {
};
}
static Function<Query, Result> disjunctionMaxQuery() {
private static Function<Query, Result> disjunctionMaxQuery() {
return query -> {
List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts();
return handleDisjunction(disjuncts, 1, false);
};
}
static Function<Query, Result> functionScoreQuery() {
private static Function<Query, Result> functionScoreQuery() {
return query -> {
FunctionScoreQuery functionScoreQuery = (FunctionScoreQuery) query;
Result result = analyze(functionScoreQuery.getSubQuery());
@ -337,60 +345,178 @@ public final class QueryAnalyzer {
// (if it matches with the percolator document matches with the extracted terms.
// Min score filters out docs, which is different than the functions, which just influences the score.)
boolean verified = functionScoreQuery.getMinScore() == null;
return new Result(verified, result.terms);
return new Result(verified, result.extractions);
};
}
static Result handleDisjunction(List<Query> disjunctions, int minimumShouldMatch, boolean otherClauses) {
private static Function<Query, Result> pointRangeQuery() {
return query -> {
PointRangeQuery pointRangeQuery = (PointRangeQuery) query;
byte[] lowerPoint = pointRangeQuery.getLowerPoint();
byte[] upperPoint = pointRangeQuery.getUpperPoint();
byte[] interval = new byte[16];
NumericUtils.subtract(16, 0, prepad(upperPoint), prepad(lowerPoint), interval);
return new Result(false, Collections.singleton(new QueryExtraction(
new Range(pointRangeQuery.getField(), lowerPoint, upperPoint, interval))
));
};
}
private static byte[] prepad(byte[] original) {
int offset = BinaryRange.BYTES - original.length;
byte[] result = new byte[BinaryRange.BYTES];
System.arraycopy(original, 0, result, offset, original.length);
return result;
}
private static Result handleDisjunction(List<Query> disjunctions, int minimumShouldMatch, boolean otherClauses) {
boolean verified = minimumShouldMatch <= 1 && otherClauses == false;
Set<Term> terms = new HashSet<>();
Set<QueryExtraction> terms = new HashSet<>();
for (Query disjunct : disjunctions) {
Result subResult = analyze(disjunct);
if (subResult.verified == false) {
verified = false;
}
terms.addAll(subResult.terms);
terms.addAll(subResult.extractions);
}
return new Result(verified, terms);
}
static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) {
if (terms1 == null) {
return terms2;
} else if (terms2 == null) {
return terms1;
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
assert extractions1 != null || extractions2 != null;
if (extractions1 == null) {
return extractions2;
} else if (extractions2 == null) {
return extractions1;
} else {
int terms1ShortestTerm = minTermLength(terms1);
int terms2ShortestTerm = minTermLength(terms2);
// keep the clause with longest terms, this likely to be rarest.
if (terms1ShortestTerm >= terms2ShortestTerm) {
return terms1;
// Prefer term based extractions over range based extractions:
boolean onlyRangeBasedExtractions = true;
for (QueryExtraction clause : extractions1) {
if (clause.term != null) {
onlyRangeBasedExtractions = false;
break;
}
}
for (QueryExtraction clause : extractions2) {
if (clause.term != null) {
onlyRangeBasedExtractions = false;
break;
}
}
if (onlyRangeBasedExtractions) {
BytesRef terms1SmallestRange = smallestRange(extractions1);
BytesRef terms2SmallestRange = smallestRange(extractions2);
// Keep the clause with smallest range, this is likely to be the rarest.
if (terms1SmallestRange.compareTo(terms2SmallestRange) <= 0) {
return extractions1;
} else {
return extractions2;
}
} else {
return terms2;
int terms1ShortestTerm = minTermLength(extractions1);
int terms2ShortestTerm = minTermLength(extractions2);
// keep the clause with longest terms, this likely to be rarest.
if (terms1ShortestTerm >= terms2ShortestTerm) {
return extractions1;
} else {
return extractions2;
}
}
}
}
static int minTermLength(Set<Term> terms) {
private static int minTermLength(Set<QueryExtraction> extractions) {
// In case there are only range extractions, then we return Integer.MIN_VALUE,
// so that selectBestExtraction(...) we are likely to prefer the extractions that contains at least a single extraction
if (extractions.stream().filter(queryExtraction -> queryExtraction.term != null).count() == 0 &&
extractions.stream().filter(queryExtraction -> queryExtraction.range != null).count() > 0) {
return Integer.MIN_VALUE;
}
int min = Integer.MAX_VALUE;
for (Term term : terms) {
min = Math.min(min, term.bytes().length);
for (QueryExtraction qt : extractions) {
if (qt.term != null) {
min = Math.min(min, qt.bytes().length);
}
}
return min;
}
private static BytesRef smallestRange(Set<QueryExtraction> terms) {
BytesRef min = terms.iterator().next().range.interval;
for (QueryExtraction qt : terms) {
if (qt.range != null) {
if (qt.range.interval.compareTo(min) < 0) {
min = qt.range.interval;
}
}
}
return min;
}
static class Result {
final Set<Term> terms;
final Set<QueryExtraction> extractions;
final boolean verified;
Result(boolean verified, Set<Term> terms) {
this.terms = terms;
Result(boolean verified, Set<QueryExtraction> extractions) {
this.extractions = extractions;
this.verified = verified;
}
}
static class QueryExtraction {
final Term term;
final Range range;
QueryExtraction(Term term) {
this.term = term;
this.range = null;
}
QueryExtraction(Range range) {
this.term = null;
this.range = range;
}
String field() {
return term != null ? term.field() : null;
}
BytesRef bytes() {
return term != null ? term.bytes() : null;
}
String text() {
return term != null ? term.text() : null;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
QueryExtraction queryExtraction = (QueryExtraction) o;
return Objects.equals(term, queryExtraction.term) &&
Objects.equals(range, queryExtraction.range);
}
@Override
public int hashCode() {
return Objects.hash(term, range);
}
@Override
public String toString() {
return "QueryExtraction{" +
"term=" + term +
",range=" + range +
'}';
}
}
/**
* Exception indicating that none or some query terms couldn't extracted from a percolator query.
*/
@ -406,9 +532,52 @@ public final class QueryAnalyzer {
/**
* The actual Lucene query that was unsupported and caused this exception to be thrown.
*/
public Query getUnsupportedQuery() {
Query getUnsupportedQuery() {
return unsupportedQuery;
}
}
static class Range {
final String fieldName;
final byte[] lowerPoint;
final byte[] upperPoint;
final BytesRef interval;
Range(String fieldName, byte[] lowerPoint, byte[] upperPoint, byte[] interval) {
this.fieldName = fieldName;
this.lowerPoint = lowerPoint;
this.upperPoint = upperPoint;
// using BytesRef here just to make use of its compareTo method.
this.interval = new BytesRef(interval);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Range range = (Range) o;
return Objects.equals(fieldName, range.fieldName) &&
Arrays.equals(lowerPoint, range.lowerPoint) &&
Arrays.equals(upperPoint, range.upperPoint);
}
@Override
public int hashCode() {
int result = 1;
result += 31 * fieldName.hashCode();
result += Arrays.hashCode(lowerPoint);
result += Arrays.hashCode(upperPoint);
return result;
}
@Override
public String toString() {
return "Range{" +
", fieldName='" + fieldName + '\'' +
", interval=" + interval +
'}';
}
}
}

View File

@ -21,7 +21,12 @@ package org.elasticsearch.percolator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.HalfFloatPoint;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
@ -81,6 +86,7 @@ import java.util.List;
import java.util.Set;
import java.util.function.Function;
import static org.elasticsearch.common.network.InetAddresses.forString;
import static org.hamcrest.Matchers.equalTo;
public class CandidateQueryTests extends ESSingleNodeTestCase {
@ -287,6 +293,174 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
duelRun(queryStore, memoryIndex, shardSearcher);
}
public void testRangeQueries() throws Exception {
List<ParseContext.Document> docs = new ArrayList<>();
addQuery(IntPoint.newRangeQuery("int_field", 0, 5), docs);
addQuery(LongPoint.newRangeQuery("long_field", 5L, 10L), docs);
addQuery(HalfFloatPoint.newRangeQuery("half_float_field", 10, 15), docs);
addQuery(FloatPoint.newRangeQuery("float_field", 15, 20), docs);
addQuery(DoublePoint.newRangeQuery("double_field", 20, 25), docs);
addQuery(InetAddressPoint.newRangeQuery("ip_field", forString("192.168.0.1"), forString("192.168.0.10")), docs);
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
shardSearcher.setQueryCache(null);
MemoryIndex memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new IntPoint("int_field", 3)), new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
Query query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher);
TopDocs topDocs = shardSearcher.search(query, 1);
assertEquals(1L, topDocs.totalHits);
assertEquals(1, topDocs.scoreDocs.length);
assertEquals(0, topDocs.scoreDocs[0].doc);
memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new LongPoint("long_field", 7L)), new WhitespaceAnalyzer());
percolateSearcher = memoryIndex.createSearcher();
query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher);
topDocs = shardSearcher.search(query, 1);
assertEquals(1L, topDocs.totalHits);
assertEquals(1, topDocs.scoreDocs.length);
assertEquals(1, topDocs.scoreDocs[0].doc);
memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new HalfFloatPoint("half_float_field", 12)),
new WhitespaceAnalyzer());
percolateSearcher = memoryIndex.createSearcher();
query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher);
topDocs = shardSearcher.search(query, 1);
assertEquals(1L, topDocs.totalHits);
assertEquals(1, topDocs.scoreDocs.length);
assertEquals(2, topDocs.scoreDocs[0].doc);
memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new FloatPoint("float_field", 17)), new WhitespaceAnalyzer());
percolateSearcher = memoryIndex.createSearcher();
query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher);
topDocs = shardSearcher.search(query, 1);
assertEquals(1, topDocs.totalHits);
assertEquals(1, topDocs.scoreDocs.length);
assertEquals(3, topDocs.scoreDocs[0].doc);
memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new DoublePoint("double_field", 21)), new WhitespaceAnalyzer());
percolateSearcher = memoryIndex.createSearcher();
query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher);
topDocs = shardSearcher.search(query, 1);
assertEquals(1, topDocs.totalHits);
assertEquals(1, topDocs.scoreDocs.length);
assertEquals(4, topDocs.scoreDocs[0].doc);
memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new InetAddressPoint("ip_field",
forString("192.168.0.4"))), new WhitespaceAnalyzer());
percolateSearcher = memoryIndex.createSearcher();
query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher);
topDocs = shardSearcher.search(query, 1);
assertEquals(1, topDocs.totalHits);
assertEquals(1, topDocs.scoreDocs.length);
assertEquals(5, topDocs.scoreDocs[0].doc);
}
public void testDuelRangeQueries() throws Exception {
List<ParseContext.Document> documents = new ArrayList<>();
int lowerInt = randomIntBetween(0, 256);
int upperInt = lowerInt + randomIntBetween(0, 32);
addQuery(IntPoint.newRangeQuery("int_field", lowerInt, upperInt), documents);
long lowerLong = randomIntBetween(0, 256);
long upperLong = lowerLong + randomIntBetween(0, 32);
addQuery(LongPoint.newRangeQuery("long_field", lowerLong, upperLong), documents);
float lowerHalfFloat = randomIntBetween(0, 256);
float upperHalfFloat = lowerHalfFloat + randomIntBetween(0, 32);
addQuery(HalfFloatPoint.newRangeQuery("half_float_field", lowerHalfFloat, upperHalfFloat), documents);
float lowerFloat = randomIntBetween(0, 256);
float upperFloat = lowerFloat + randomIntBetween(0, 32);
addQuery(FloatPoint.newRangeQuery("float_field", lowerFloat, upperFloat), documents);
double lowerDouble = randomDoubleBetween(0, 256, true);
double upperDouble = lowerDouble + randomDoubleBetween(0, 32, true);
addQuery(DoublePoint.newRangeQuery("double_field", lowerDouble, upperDouble), documents);
int lowerIpPart = randomIntBetween(0, 255);
int upperIpPart = randomIntBetween(lowerIpPart, 255);
addQuery(InetAddressPoint.newRangeQuery("ip_field", forString("192.168.1." + lowerIpPart),
forString("192.168.1." + upperIpPart)), documents);
indexWriter.addDocuments(documents);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
int randomInt = randomIntBetween(lowerInt, upperInt);
Iterable<? extends IndexableField> doc = Collections.singleton(new IntPoint("int_field", randomInt));
MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
TopDocs result = executeQuery(queryStore, memoryIndex, shardSearcher);
assertThat(result.scoreDocs.length, equalTo(1));
assertThat(result.scoreDocs[0].doc, equalTo(0));
duelRun(queryStore, memoryIndex, shardSearcher);
doc = Collections.singleton(new IntPoint("int_field", randomInt()));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
long randomLong = randomIntBetween((int) lowerLong, (int) upperLong);
doc = Collections.singleton(new LongPoint("long_field", randomLong));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
result = executeQuery(queryStore, memoryIndex, shardSearcher);
assertThat(result.scoreDocs.length, equalTo(1));
assertThat(result.scoreDocs[0].doc, equalTo(1));
duelRun(queryStore, memoryIndex, shardSearcher);
doc = Collections.singleton(new LongPoint("long_field", randomLong()));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
float randomHalfFloat = randomIntBetween((int) lowerHalfFloat, (int) upperHalfFloat);
doc = Collections.singleton(new HalfFloatPoint("half_float_field", randomHalfFloat));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
result = executeQuery(queryStore, memoryIndex, shardSearcher);
assertThat(result.scoreDocs.length, equalTo(1));
assertThat(result.scoreDocs[0].doc, equalTo(2));
duelRun(queryStore, memoryIndex, shardSearcher);
doc = Collections.singleton(new HalfFloatPoint("half_float_field", randomFloat()));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
float randomFloat = randomIntBetween((int) lowerFloat, (int) upperFloat);
doc = Collections.singleton(new FloatPoint("float_field", randomFloat));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
result = executeQuery(queryStore, memoryIndex, shardSearcher);
assertThat(result.scoreDocs.length, equalTo(1));
assertThat(result.scoreDocs[0].doc, equalTo(3));
duelRun(queryStore, memoryIndex, shardSearcher);
doc = Collections.singleton(new FloatPoint("float_field", randomFloat()));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
double randomDouble = randomDoubleBetween(lowerDouble, upperDouble, true);
doc = Collections.singleton(new DoublePoint("double_field", randomDouble));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
result = executeQuery(queryStore, memoryIndex, shardSearcher);
assertThat(result.scoreDocs.length, equalTo(1));
assertThat(result.scoreDocs[0].doc, equalTo(4));
duelRun(queryStore, memoryIndex, shardSearcher);
doc = Collections.singleton(new DoublePoint("double_field", randomFloat()));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
doc = Collections.singleton(new InetAddressPoint("ip_field",
forString("192.168.1." + randomIntBetween(lowerIpPart, upperIpPart))));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
result = executeQuery(queryStore, memoryIndex, shardSearcher);
assertThat(result.scoreDocs.length, equalTo(1));
assertThat(result.scoreDocs[0].doc, equalTo(5));
duelRun(queryStore, memoryIndex, shardSearcher);
doc = Collections.singleton(new InetAddressPoint("ip_field",
forString("192.168.1." + randomIntBetween(0, 255))));
memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
}
private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
boolean requireScore = randomBoolean();
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
@ -319,6 +493,14 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
queries.add(query);
}
private TopDocs executeQuery(PercolateQuery.QueryStore queryStore,
MemoryIndex memoryIndex,
IndexSearcher shardSearcher) throws IOException {
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
Query percolateQuery = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher);
return shardSearcher.search(percolateQuery, 10);
}
private static final class CustomQuery extends Query {
private final Term term;

View File

@ -20,6 +20,11 @@
package org.elasticsearch.percolator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.HalfFloatPoint;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
@ -38,6 +43,8 @@ import org.apache.lucene.util.BytesRef;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.hash.MurmurHash3;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -77,6 +84,7 @@ import org.elasticsearch.test.InternalSettingsPlugin;
import org.junit.Before;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -128,7 +136,13 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.startObject("field2").field("type", "text").endObject()
.startObject("_field3").field("type", "text").endObject()
.startObject("field4").field("type", "text").endObject()
.startObject("number_field").field("type", "long").endObject()
.startObject("number_field1").field("type", "integer").endObject()
.startObject("number_field2").field("type", "long").endObject()
.startObject("number_field3").field("type", "long").endObject()
.startObject("number_field4").field("type", "half_float").endObject()
.startObject("number_field5").field("type", "float").endObject()
.startObject("number_field6").field("type", "double").endObject()
.startObject("number_field7").field("type", "ip").endObject()
.startObject("date_field").field("type", "date").endObject()
.endObject().endObject().endObject().string();
mapperService.merge("doc", new CompressedXContent(mapper), MapperService.MergeReason.MAPPING_UPDATE, false);
@ -206,12 +220,12 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field2", 10L), new WhitespaceAnalyzer());
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
assertEquals(2, candidateQuery.clauses().size());
assertEquals(3, candidateQuery.clauses().size());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery();
@ -236,6 +250,54 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)),
candidateQuery.clauses().get(1).getQuery());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(2).getOccur());
assertThat(candidateQuery.clauses().get(2).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:"));
}
public void testCreateCandidateQuery_numberFields() throws Exception {
addQueryFieldMappings();
MemoryIndex memoryIndex = new MemoryIndex(false);
memoryIndex.addField(new IntPoint("number_field1", 10), new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field2", 20L), new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field3", 30L), new WhitespaceAnalyzer());
memoryIndex.addField(new HalfFloatPoint("number_field4", 30f), new WhitespaceAnalyzer());
memoryIndex.addField(new FloatPoint("number_field5", 40f), new WhitespaceAnalyzer());
memoryIndex.addField(new DoublePoint("number_field6", 50f), new WhitespaceAnalyzer());
memoryIndex.addField(new InetAddressPoint("number_field7", InetAddresses.forString("192.168.1.12")), new WhitespaceAnalyzer());
memoryIndex.addField(new InetAddressPoint("number_field7", InetAddresses.forString("192.168.1.20")), new WhitespaceAnalyzer());
memoryIndex.addField(new InetAddressPoint("number_field7", InetAddresses.forString("192.168.1.24")), new WhitespaceAnalyzer());
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
assertEquals(8, candidateQuery.clauses().size());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)),
candidateQuery.clauses().get(0).getQuery());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
assertThat(candidateQuery.clauses().get(1).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:[["));
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(2).getOccur());
assertThat(candidateQuery.clauses().get(2).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:[["));
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(3).getOccur());
assertThat(candidateQuery.clauses().get(3).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:[["));
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(4).getOccur());
assertThat(candidateQuery.clauses().get(4).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:[["));
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(5).getOccur());
assertThat(candidateQuery.clauses().get(5).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:[["));
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(6).getOccur());
assertThat(candidateQuery.clauses().get(6).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:[["));
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(7).getOccur());
assertThat(candidateQuery.clauses().get(7).getQuery().toString(), containsString(fieldName + ".range_field:<ranges:[["));
}
private void assertTermIterator(PrefixCodedTerms.TermIterator termIterator, String expectedValue, String expectedField) {
@ -283,7 +345,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
addQueryFieldMappings();
QueryBuilder[] queries = new QueryBuilder[]{
termQuery("field", "value"), matchAllQuery(), matchQuery("field", "value"), matchPhraseQuery("field", "value"),
prefixQuery("field", "v"), wildcardQuery("field", "v*"), rangeQuery("number_field").gte(0).lte(9),
prefixQuery("field", "v"), wildcardQuery("field", "v*"), rangeQuery("number_field2").gte(0).lte(9),
rangeQuery("date_field").from("2015-01-01T00:00").to("2015-01-01T00:00")
};
// note: it important that range queries never rewrite, otherwise it will cause results to be wrong.
@ -551,6 +613,72 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
((List) XContentMapValues.extractValue("function_score.functions.script_score.script.lang", parsedQuery)).get(0));
}
public void testEncodeRange() {
int iters = randomIntBetween(32, 256);
for (int i = 0; i < iters; i++) {
int encodingType = randomInt(1);
final int randomFrom = randomInt();
final byte[] encodedFrom;
switch (encodingType) {
case 0:
encodedFrom = new byte[Integer.BYTES];
IntPoint.encodeDimension(randomFrom, encodedFrom, 0);
break;
case 1:
encodedFrom = new byte[Long.BYTES];
LongPoint.encodeDimension(randomFrom, encodedFrom, 0);
break;
default:
throw new AssertionError("unexpected encoding type [" + encodingType + "]");
}
final int randomTo = randomIntBetween(randomFrom, Integer.MAX_VALUE);
final byte[] encodedTo;
switch (encodingType) {
case 0:
encodedTo = new byte[Integer.BYTES];
IntPoint.encodeDimension(randomTo, encodedTo, 0);
break;
case 1:
encodedTo = new byte[Long.BYTES];
LongPoint.encodeDimension(randomTo, encodedTo, 0);
break;
default:
throw new AssertionError("unexpected encoding type [" + encodingType + "]");
}
String fieldName = randomAlphaOfLength(5);
byte[] result = PercolatorFieldMapper.encodeRange(fieldName, encodedFrom, encodedTo);
assertEquals(32, result.length);
BytesRef fieldAsBytesRef = new BytesRef(fieldName);
MurmurHash3.Hash128 hash = new MurmurHash3.Hash128();
MurmurHash3.hash128(fieldAsBytesRef.bytes, fieldAsBytesRef.offset, fieldAsBytesRef.length, 0, hash);
switch (encodingType) {
case 0:
assertEquals(hash.h1, ByteBuffer.wrap(subByteArray(result, 0, 8)).getLong());
assertEquals(randomFrom, IntPoint.decodeDimension(subByteArray(result, 12, 4), 0));
assertEquals(hash.h1, ByteBuffer.wrap(subByteArray(result, 16, 8)).getLong());
assertEquals(randomTo, IntPoint.decodeDimension(subByteArray(result, 28, 4), 0));
break;
case 1:
assertEquals(hash.h1, ByteBuffer.wrap(subByteArray(result, 0, 8)).getLong());
assertEquals(randomFrom, LongPoint.decodeDimension(subByteArray(result, 8, 8), 0));
assertEquals(hash.h1, ByteBuffer.wrap(subByteArray(result, 16, 8)).getLong());
assertEquals(randomTo, LongPoint.decodeDimension(subByteArray(result, 24, 8), 0));
break;
default:
throw new AssertionError("unexpected encoding type [" + encodingType + "]");
}
}
}
private static byte[] subByteArray(byte[] source, int offset, int length) {
return Arrays.copyOfRange(source, offset, offset + length);
}
// Just so that we store scripts in percolator queries, but not really execute these scripts.
public static class FoolMeScriptPlugin extends MockScriptPlugin {

View File

@ -18,6 +18,12 @@
*/
package org.elasticsearch.percolator;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.HalfFloatPoint;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.CommonTermsQuery;
@ -30,6 +36,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
@ -42,19 +49,23 @@ import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import org.elasticsearch.common.lucene.search.function.RandomScoreFunction;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.percolator.QueryAnalyzer.QueryExtraction;
import org.elasticsearch.percolator.QueryAnalyzer.Result;
import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import static org.elasticsearch.percolator.QueryAnalyzer.UnsupportedQueryException;
import static org.elasticsearch.percolator.QueryAnalyzer.analyze;
import static org.elasticsearch.percolator.QueryAnalyzer.selectTermListWithTheLongestShortestTerm;
import static org.elasticsearch.percolator.QueryAnalyzer.selectBestExtraction;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;
@ -65,7 +76,7 @@ public class QueryAnalyzerTests extends ESTestCase {
TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
Result result = analyze(termQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
@ -75,8 +86,8 @@ public class QueryAnalyzerTests extends ESTestCase {
TermInSetQuery termsQuery = new TermInSetQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
Result result = analyze(termsQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
terms.sort(Comparator.comparing(qt -> qt.term));
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(terms.get(0).text(), equalTo("_term1"));
@ -88,7 +99,7 @@ public class QueryAnalyzerTests extends ESTestCase {
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
Result result = analyze(phraseQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
@ -96,14 +107,14 @@ public class QueryAnalyzerTests extends ESTestCase {
public void testExtractQueryMetadata_multiPhraseQuery() {
MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder()
.add(new Term("_field", "_long_term"))
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_term")})
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")})
.add(new Term[] {new Term("_field", "_very_long_term")})
.build();
.add(new Term("_field", "_long_term"))
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_term")})
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")})
.add(new Term[] {new Term("_field", "_very_long_term")})
.build();
Result result = analyze(multiPhraseQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(terms.get(0).bytes().utf8ToString(), equalTo("_very_long_term"));
@ -126,8 +137,8 @@ public class QueryAnalyzerTests extends ESTestCase {
BooleanQuery booleanQuery = builder.build();
Result result = analyze(booleanQuery);
assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
terms.sort(Comparator.comparing(qt -> qt.term));
assertThat(terms.size(), equalTo(3));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -154,8 +165,8 @@ public class QueryAnalyzerTests extends ESTestCase {
BooleanQuery booleanQuery = builder.build();
Result result = analyze(booleanQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
List<QueryAnalyzer.QueryExtraction> terms = new ArrayList<>(result.extractions);
terms.sort(Comparator.comparing(qt -> qt.term));
assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -177,7 +188,7 @@ public class QueryAnalyzerTests extends ESTestCase {
BooleanQuery booleanQuery = builder.build();
Result result = analyze(booleanQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
@ -242,7 +253,7 @@ public class QueryAnalyzerTests extends ESTestCase {
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
Result result = analyze(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -253,7 +264,7 @@ public class QueryAnalyzerTests extends ESTestCase {
BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
Result result = analyze(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -265,8 +276,8 @@ public class QueryAnalyzerTests extends ESTestCase {
commonTermsQuery.add(new Term("_field", "_term2"));
Result result = analyze(commonTermsQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
List<QueryExtraction> terms = new ArrayList<>(result.extractions);
terms.sort(Comparator.comparing(qt -> qt.term));
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(terms.get(0).text(), equalTo("_term1"));
@ -279,8 +290,8 @@ public class QueryAnalyzerTests extends ESTestCase {
BlendedTermQuery commonTermsQuery = BlendedTermQuery.dismaxBlendedQuery(termsArr, 1.0f);
Result result = analyze(commonTermsQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
List<QueryAnalyzer.QueryExtraction> terms = new ArrayList<>(result.extractions);
terms.sort(Comparator.comparing(qt -> qt.term));
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(terms.get(0).text(), equalTo("_term1"));
@ -303,18 +314,18 @@ public class QueryAnalyzerTests extends ESTestCase {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
Result result = analyze(spanTermQuery1);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
assertTermsEqual(result.extractions, spanTermQuery1.getTerm());
}
public void testExtractQueryMetadata_spanNearQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
Result result = analyze(spanNearQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery2.getTerm());
assertTermsEqual(result.extractions, spanTermQuery2.getTerm());
}
public void testExtractQueryMetadata_spanOrQuery() {
@ -323,7 +334,7 @@ public class QueryAnalyzerTests extends ESTestCase {
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
Result result = analyze(spanOrQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
assertTermsEqual(result.extractions, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
}
public void testExtractQueryMetadata_spanFirstQuery() {
@ -331,7 +342,7 @@ public class QueryAnalyzerTests extends ESTestCase {
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
Result result = analyze(spanFirstQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
assertTermsEqual(result.extractions, spanTermQuery1.getTerm());
}
public void testExtractQueryMetadata_spanNotQuery() {
@ -340,35 +351,35 @@ public class QueryAnalyzerTests extends ESTestCase {
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
Result result = analyze(spanNotQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
assertTermsEqual(result.extractions, spanTermQuery1.getTerm());
}
public void testExtractQueryMetadata_matchNoDocsQuery() {
Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all"));
assertThat(result.verified, is(true));
assertEquals(0, result.terms.size());
assertEquals(0, result.extractions.size());
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
result = analyze(bq.build());
assertThat(result.verified, is(false));
assertEquals(0, result.terms.size());
assertEquals(0, result.extractions.size());
bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
result = analyze(bq.build());
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
assertTermsEqual(result.extractions, new Term("field", "value"));
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
1f
Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
1f
);
result = analyze(disjunctionMaxQuery);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
assertTermsEqual(result.extractions, new Term("field", "value"));
}
public void testExtractQueryMetadata_matchAllDocsQuery() {
@ -379,7 +390,7 @@ public class QueryAnalyzerTests extends ESTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
Result result = analyze(builder.build());
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, new Term("field", "value"));
assertTermsEqual(result.extractions, new Term("field", "value"));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
@ -442,7 +453,7 @@ public class QueryAnalyzerTests extends ESTestCase {
Result result = analyze(bq1);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery1.getTerm());
assertTermsEqual(result.extractions, termQuery1.getTerm());
TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term"));
builder = new BooleanQuery.Builder();
@ -452,7 +463,7 @@ public class QueryAnalyzerTests extends ESTestCase {
bq1 = builder.build();
result = analyze(bq1);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery2.getTerm());
assertTermsEqual(result.extractions, termQuery2.getTerm());
builder = new BooleanQuery.Builder();
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
@ -468,13 +479,13 @@ public class QueryAnalyzerTests extends ESTestCase {
TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3"));
TermQuery termQuery4 = new TermQuery(new Term("_field", "_term4"));
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
);
Result result = analyze(disjunctionMaxQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
List<QueryAnalyzer.QueryExtraction> terms = new ArrayList<>(result.extractions);
terms.sort(Comparator.comparing(qt -> qt.term));
assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -486,13 +497,13 @@ public class QueryAnalyzerTests extends ESTestCase {
assertThat(terms.get(3).bytes(), equalTo(termQuery4.getTerm().bytes()));
disjunctionMaxQuery = new DisjunctionMaxQuery(
Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
);
result = analyze(disjunctionMaxQuery);
assertThat(result.verified, is(false));
terms = new ArrayList<>(result.terms);
Collections.sort(terms);
terms = new ArrayList<>(result.extractions);
terms.sort(Comparator.comparing(qt -> qt.term));
assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -508,12 +519,12 @@ public class QueryAnalyzerTests extends ESTestCase {
SynonymQuery query = new SynonymQuery();
Result result = analyze(query);
assertThat(result.verified, is(true));
assertThat(result.terms.isEmpty(), is(true));
assertThat(result.extractions.isEmpty(), is(true));
query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2"));
result = analyze(query);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("_field", "_value1"), new Term("_field", "_value2"));
assertTermsEqual(result.extractions, new Term("_field", "_value1"), new Term("_field", "_value2"));
}
public void testFunctionScoreQuery() {
@ -521,42 +532,211 @@ public class QueryAnalyzerTests extends ESTestCase {
FunctionScoreQuery functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(0, 0, null));
Result result = analyze(functionScoreQuery);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("_field", "_value"));
assertTermsEqual(result.extractions, new Term("_field", "_value"));
functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(0, 0, null), 1f, null, 10f);
result = analyze(functionScoreQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, new Term("_field", "_value"));
assertTermsEqual(result.extractions, new Term("_field", "_value"));
}
public void testSelectTermsListWithHighestSumOfTermLength() {
Set<Term> terms1 = new HashSet<>();
public void testSelectBestExtraction() {
Set<QueryExtraction> queryTerms1 = terms(new int[0], "12", "1234", "12345");
Set<QueryAnalyzer.QueryExtraction> queryTerms2 = terms(new int[0], "123", "1234", "12345");
Set<QueryExtraction> result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame(queryTerms2, result);
queryTerms1 = terms(new int[]{1, 2, 3});
queryTerms2 = terms(new int[]{2, 3, 4});
result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame(queryTerms1, result);
queryTerms1 = terms(new int[]{4, 5, 6});
queryTerms2 = terms(new int[]{1, 2, 3});
result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame(queryTerms2, result);
queryTerms1 = terms(new int[]{1, 2, 3}, "123", "456");
queryTerms2 = terms(new int[]{2, 3, 4}, "123", "456");
result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame(queryTerms1, result);
queryTerms1 = terms(new int[]{10});
queryTerms2 = terms(new int[]{1});
result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame(queryTerms2, result);
queryTerms1 = terms(new int[]{10}, "123");
queryTerms2 = terms(new int[]{1});
result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame(queryTerms1, result);
queryTerms1 = terms(new int[]{10}, "1", "123");
queryTerms2 = terms(new int[]{1}, "1", "2");
result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame(queryTerms1, result);
queryTerms1 = terms(new int[]{1, 2, 3}, "123", "456");
queryTerms2 = terms(new int[]{2, 3, 4}, "1", "456");
result = selectBestExtraction(queryTerms1, queryTerms2);
assertSame("Ignoring ranges, so then prefer queryTerms1, because it has the longest shortest term", queryTerms1, result);
}
public void testSelectBestExtraction_random() {
Set<QueryExtraction> terms1 = new HashSet<>();
int shortestTerms1Length = Integer.MAX_VALUE;
int sumTermLength = randomIntBetween(1, 128);
while (sumTermLength > 0) {
int length = randomInt(sumTermLength);
shortestTerms1Length = Math.min(shortestTerms1Length, length);
terms1.add(new Term("field", randomAlphaOfLength(length)));
terms1.add(new QueryExtraction(new Term("field", randomAlphaOfLength(length))));
sumTermLength -= length;
}
Set<Term> terms2 = new HashSet<>();
Set<QueryExtraction> terms2 = new HashSet<>();
int shortestTerms2Length = Integer.MAX_VALUE;
sumTermLength = randomIntBetween(1, 128);
while (sumTermLength > 0) {
int length = randomInt(sumTermLength);
shortestTerms2Length = Math.min(shortestTerms2Length, length);
terms2.add(new Term("field", randomAlphaOfLength(length)));
terms2.add(new QueryExtraction(new Term("field", randomAlphaOfLength(length))));
sumTermLength -= length;
}
Set<Term> result = selectTermListWithTheLongestShortestTerm(terms1, terms2);
Set<Term> expected = shortestTerms1Length >= shortestTerms2Length ? terms1 : terms2;
Set<QueryAnalyzer.QueryExtraction> result = selectBestExtraction(terms1, terms2);
Set<QueryExtraction> expected = shortestTerms1Length >= shortestTerms2Length ? terms1 : terms2;
assertThat(result, sameInstance(expected));
}
private static void assertTermsEqual(Set<Term> actual, Term... expected) {
assertEquals(new HashSet<>(Arrays.asList(expected)), actual);
public void testPointRangeQuery() {
// int ranges get converted to long ranges:
Query query = IntPoint.newRangeQuery("_field", 10, 20);
Result result = analyze(query);
assertFalse(result.verified);
List<QueryAnalyzer.QueryExtraction> ranges = new ArrayList<>(result.extractions);
assertThat(ranges.size(), equalTo(1));
assertNull(ranges.get(0).term);
assertEquals("_field", ranges.get(0).range.fieldName);
assertDimension(ranges.get(0).range.lowerPoint, bytes -> IntPoint.encodeDimension(10, bytes, 0));
assertDimension(ranges.get(0).range.upperPoint, bytes -> IntPoint.encodeDimension(20, bytes, 0));
query = LongPoint.newRangeQuery("_field", 10L, 21L);
result = analyze(query);
assertFalse(result.verified);
ranges = new ArrayList<>(result.extractions);
assertThat(ranges.size(), equalTo(1));
assertNull(ranges.get(0).term);
assertEquals("_field", ranges.get(0).range.fieldName);
assertDimension(ranges.get(0).range.lowerPoint, bytes -> LongPoint.encodeDimension(10L, bytes, 0));
assertDimension(ranges.get(0).range.upperPoint, bytes -> LongPoint.encodeDimension(21L, bytes, 0));
// Half float ranges get converted to double ranges:
query = HalfFloatPoint.newRangeQuery("_field", 10F, 20F);
result = analyze(query);
assertFalse(result.verified);
ranges = new ArrayList<>(result.extractions);
assertThat(ranges.size(), equalTo(1));
assertNull(ranges.get(0).term);
assertEquals("_field", ranges.get(0).range.fieldName);
assertDimension(ranges.get(0).range.lowerPoint, bytes -> HalfFloatPoint.encodeDimension(10F, bytes, 0));
assertDimension(ranges.get(0).range.upperPoint, bytes -> HalfFloatPoint.encodeDimension(20F, bytes, 0));
// Float ranges get converted to double ranges:
query = FloatPoint.newRangeQuery("_field", 10F, 20F);
result = analyze(query);
assertFalse(result.verified);
ranges = new ArrayList<>(result.extractions);
assertThat(ranges.size(), equalTo(1));
assertNull(ranges.get(0).term);
assertEquals("_field", ranges.get(0).range.fieldName);
assertDimension(ranges.get(0).range.lowerPoint, bytes -> FloatPoint.encodeDimension(10F, bytes, 0));
assertDimension(ranges.get(0).range.upperPoint, bytes -> FloatPoint.encodeDimension(20F, bytes, 0));
query = DoublePoint.newRangeQuery("_field", 10D, 20D);
result = analyze(query);
assertFalse(result.verified);
ranges = new ArrayList<>(result.extractions);
assertThat(ranges.size(), equalTo(1));
assertNull(ranges.get(0).term);
assertEquals("_field", ranges.get(0).range.fieldName);
assertDimension(ranges.get(0).range.lowerPoint, bytes -> DoublePoint.encodeDimension(10D, bytes, 0));
assertDimension(ranges.get(0).range.upperPoint, bytes -> DoublePoint.encodeDimension(20D, bytes, 0));
query = InetAddressPoint.newRangeQuery("_field", InetAddresses.forString("192.168.1.0"),
InetAddresses.forString("192.168.1.255"));
result = analyze(query);
assertFalse(result.verified);
ranges = new ArrayList<>(result.extractions);
assertThat(ranges.size(), equalTo(1));
assertNull(ranges.get(0).term);
assertEquals("_field", ranges.get(0).range.fieldName);
assertArrayEquals(ranges.get(0).range.lowerPoint, InetAddressPoint.encode(InetAddresses.forString("192.168.1.0")));
assertArrayEquals(ranges.get(0).range.upperPoint, InetAddressPoint.encode(InetAddresses.forString("192.168.1.255")));
}
public void testPointRangeQuerySelectShortestRange() {
BooleanQuery.Builder boolQuery = new BooleanQuery.Builder();
boolQuery.add(LongPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER);
boolQuery.add(LongPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER);
Result result = analyze(boolQuery.build());
assertFalse(result.verified);
assertEquals(1, result.extractions.size());
assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName);
boolQuery = new BooleanQuery.Builder();
boolQuery.add(LongPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER);
boolQuery.add(IntPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER);
result = analyze(boolQuery.build());
assertFalse(result.verified);
assertEquals(1, result.extractions.size());
assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName);
boolQuery = new BooleanQuery.Builder();
boolQuery.add(DoublePoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER);
boolQuery.add(DoublePoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER);
result = analyze(boolQuery.build());
assertFalse(result.verified);
assertEquals(1, result.extractions.size());
assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName);
boolQuery = new BooleanQuery.Builder();
boolQuery.add(DoublePoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER);
boolQuery.add(FloatPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER);
result = analyze(boolQuery.build());
assertFalse(result.verified);
assertEquals(1, result.extractions.size());
assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName);
boolQuery = new BooleanQuery.Builder();
boolQuery.add(HalfFloatPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER);
boolQuery.add(HalfFloatPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER);
result = analyze(boolQuery.build());
assertFalse(result.verified);
assertEquals(1, result.extractions.size());
assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName);
}
private static void assertDimension(byte[] expected, Consumer<byte[]> consumer) {
byte[] dest = new byte[expected.length];
consumer.accept(dest);
assertArrayEquals(expected, dest);
}
private static void assertTermsEqual(Set<QueryExtraction> actual, Term... expected) {
assertEquals(Arrays.stream(expected).map(QueryExtraction::new).collect(Collectors.toSet()), actual);
}
private static Set<QueryExtraction> terms(int[] intervals, String... values) {
Set<QueryExtraction> queryExtractions = new HashSet<>();
for (int interval : intervals) {
byte[] encodedInterval = new byte[4];
IntPoint.encodeDimension(interval, encodedInterval, 0);
queryExtractions.add(new QueryAnalyzer.QueryExtraction(new QueryAnalyzer.Range("_field", null, null, encodedInterval)));
}
for (String value : values) {
queryExtractions.add(new QueryExtraction(new Term("_field", value)));
}
return queryExtractions;
}
}