From edad7b47379fd509fabca1376e2492067a5dc213 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 11 Jul 2017 15:34:59 +0200 Subject: [PATCH] Add support for selecting percolator query candidate matches containing range queries. Extracts ranges from range queries on byte, short, integer, long, half_float, scaled_float, float, double, date and ip fields. byte, short, integer and date ranges are normalized to Lucene's LongRange. half_float and float are normalized to Lucene's DoubleRange. When extracting range queries, the QueryAnalyzer computes the width of the range. This width is used to determine what range should be preferred in a conjunction query. The QueryAnalyzer prefers the smaller ranges, because these ranges tend to match with less documents. Closes #21040 --- .../apache/lucene/document/BinaryRange.java | 71 +++++ .../percolator/PercolateQuery.java | 8 +- .../percolator/PercolatorFieldMapper.java | 152 ++++++--- .../PercolatorHighlightSubFetchPhase.java | 2 +- .../percolator/QueryAnalyzer.java | 297 ++++++++++++++---- .../percolator/CandidateQueryTests.java | 182 +++++++++++ .../PercolatorFieldMapperTests.java | 136 +++++++- .../percolator/QueryAnalyzerTests.java | 294 +++++++++++++---- 8 files changed, 970 insertions(+), 172 deletions(-) create mode 100644 core/src/main/java/org/apache/lucene/document/BinaryRange.java diff --git a/core/src/main/java/org/apache/lucene/document/BinaryRange.java b/core/src/main/java/org/apache/lucene/document/BinaryRange.java new file mode 100644 index 00000000000..91be2aa520e --- /dev/null +++ b/core/src/main/java/org/apache/lucene/document/BinaryRange.java @@ -0,0 +1,71 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lucene.document; + +import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; + +/** + * A range field for binary encoded ranges + */ +public final class BinaryRange extends Field { + /** The number of bytes per dimension, use {@link InetAddressPoint#BYTES} as max, because that is maximum we need to support */ + public static final int BYTES = InetAddressPoint.BYTES; + + private static final FieldType TYPE; + static { + TYPE = new FieldType(); + TYPE.setDimensions(2, BYTES); + TYPE.freeze(); + } + + /** + * Create a new BinaryRange from a provided encoded binary range + * @param name field name. must not be null. + * @param encodedRange Encoded range + */ + public BinaryRange(String name, byte[] encodedRange) { + super(name, TYPE); + if (encodedRange.length != BYTES * 2) { + throw new IllegalArgumentException("Unexpected encoded range length [" + encodedRange.length + "]"); + } + fieldsData = new BytesRef(encodedRange); + } + + /** + * Create a query for matching indexed ip ranges that {@code INTERSECT} the defined range. + * @param field field name. must not be null. + * @param encodedRange Encoded range + * @return query for matching intersecting encoded ranges (overlap, within, crosses, or contains) + * @throws IllegalArgumentException if {@code field} is null, {@code min} or {@code max} is invalid + */ + public static Query newIntersectsQuery(String field, byte[] encodedRange) { + return newRelationQuery(field, encodedRange, RangeFieldQuery.QueryType.INTERSECTS); + } + + static Query newRelationQuery(String field, byte[] encodedRange, RangeFieldQuery.QueryType relation) { + return new RangeFieldQuery(field, encodedRange, 1, relation) { + @Override + protected String toString(byte[] ranges, int dimension) { + return "[" + new BytesRef(ranges, 0, BYTES) + " TO " + new BytesRef(ranges, BYTES, BYTES) + "]"; + } + }; + } + +} diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java index 4283ce3fea4..a7ca013ec22 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java @@ -44,7 +44,7 @@ import java.util.Set; final class PercolateQuery extends Query implements Accountable { // cost of matching the query against the document, arbitrary as it would be really complex to estimate - public static final float MATCH_COST = 1000; + private static final float MATCH_COST = 1000; private final QueryStore queryStore; private final BytesReference documentSource; @@ -164,15 +164,15 @@ final class PercolateQuery extends Query implements Accountable { }; } - public IndexSearcher getPercolatorIndexSearcher() { + IndexSearcher getPercolatorIndexSearcher() { return percolatorIndexSearcher; } - public BytesReference getDocumentSource() { + BytesReference getDocumentSource() { return documentSource; } - public QueryStore getQueryStore() { + QueryStore getQueryStore() { return queryStore; } diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java index d67f870f407..771432400a7 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.percolator; +import org.apache.lucene.document.BinaryRange; import org.apache.lucene.document.Field; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; @@ -25,6 +26,7 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.PointValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -40,6 +42,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.hash.MurmurHash3; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -54,6 +57,8 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.RangeFieldMapper; +import org.elasticsearch.index.mapper.RangeFieldMapper.RangeType; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.BoostingQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder; @@ -65,9 +70,11 @@ import org.elasticsearch.index.query.Rewriteable; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -77,10 +84,10 @@ import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQuery public class PercolatorFieldMapper extends FieldMapper { - public static final XContentType QUERY_BUILDER_CONTENT_TYPE = XContentType.SMILE; - public static final Setting INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING = + static final XContentType QUERY_BUILDER_CONTENT_TYPE = XContentType.SMILE; + static final Setting INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING = Setting.boolSetting("index.percolator.map_unmapped_fields_as_string", false, Setting.Property.IndexScope); - public static final String CONTENT_TYPE = "percolator"; + static final String CONTENT_TYPE = "percolator"; private static final FieldType FIELD_TYPE = new FieldType(); static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point @@ -88,15 +95,16 @@ public class PercolatorFieldMapper extends FieldMapper { static final String EXTRACTION_PARTIAL = "partial"; static final String EXTRACTION_FAILED = "failed"; - public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms"; - public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result"; - public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field"; + static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms"; + static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result"; + static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field"; + static final String RANGE_FIELD_NAME = "range_field"; - public static class Builder extends FieldMapper.Builder { + static class Builder extends FieldMapper.Builder { private final Supplier queryShardContext; - public Builder(String fieldName, Supplier queryShardContext) { + Builder(String fieldName, Supplier queryShardContext) { super(fieldName, FIELD_TYPE, FIELD_TYPE); this.queryShardContext = queryShardContext; } @@ -111,11 +119,15 @@ public class PercolatorFieldMapper extends FieldMapper { fieldType.extractionResultField = extractionResultField.fieldType(); BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context); fieldType.queryBuilderField = queryBuilderField.fieldType(); + // Range field is of type ip, because that matches closest with BinaryRange field. Otherwise we would + // have to introduce a new field type... + RangeFieldMapper rangeFieldMapper = createExtractedRangeFieldBuilder(RANGE_FIELD_NAME, RangeType.IP, context); + fieldType.rangeField = rangeFieldMapper.fieldType(); context.path().remove(); setupFieldType(context); return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField, - extractionResultField, queryBuilderField); + extractionResultField, queryBuilderField, rangeFieldMapper); } static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) { @@ -135,9 +147,16 @@ public class PercolatorFieldMapper extends FieldMapper { return builder.build(context); } + static RangeFieldMapper createExtractedRangeFieldBuilder(String name, RangeType rangeType, BuilderContext context) { + RangeFieldMapper.Builder builder = new RangeFieldMapper.Builder(name, rangeType, context.indexCreatedVersion()); + // For now no doc values, because in processQuery(...) only the Lucene range fields get added: + builder.docValues(false); + return builder.build(context); + } + } - public static class TypeParser implements FieldMapper.TypeParser { + static class TypeParser implements FieldMapper.TypeParser { @Override public Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { @@ -145,23 +164,26 @@ public class PercolatorFieldMapper extends FieldMapper { } } - public static class FieldType extends MappedFieldType { + static class FieldType extends MappedFieldType { MappedFieldType queryTermsField; MappedFieldType extractionResultField; MappedFieldType queryBuilderField; - public FieldType() { + RangeFieldMapper.RangeFieldType rangeField; + + FieldType() { setIndexOptions(IndexOptions.NONE); setDocValuesType(DocValuesType.NONE); setStored(false); } - public FieldType(FieldType ref) { + FieldType(FieldType ref) { super(ref); queryTermsField = ref.queryTermsField; extractionResultField = ref.extractionResultField; queryBuilderField = ref.queryBuilderField; + rangeField = ref.rangeField; } @Override @@ -198,33 +220,49 @@ public class PercolatorFieldMapper extends FieldMapper { Query createCandidateQuery(IndexReader indexReader) throws IOException { List extractedTerms = new ArrayList<>(); + Map> encodedPointValuesByField = new HashMap<>(); + LeafReader reader = indexReader.leaves().get(0).reader(); for (FieldInfo info : reader.getFieldInfos()) { Terms terms = reader.terms(info.name); - if (terms == null) { - continue; + if (terms != null) { + BytesRef fieldBr = new BytesRef(info.name); + TermsEnum tenum = terms.iterator(); + for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { + BytesRefBuilder builder = new BytesRefBuilder(); + builder.append(fieldBr); + builder.append(FIELD_VALUE_SEPARATOR); + builder.append(term); + extractedTerms.add(builder.toBytesRef()); + } } - - BytesRef fieldBr = new BytesRef(info.name); - TermsEnum tenum = terms.iterator(); - for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { - BytesRefBuilder builder = new BytesRefBuilder(); - builder.append(fieldBr); - builder.append(FIELD_VALUE_SEPARATOR); - builder.append(term); - extractedTerms.add(builder.toBytesRef()); + if (info.getPointDimensionCount() == 1) { // not != 0 because range fields are not supported + PointValues values = reader.getPointValues(info.name); + List encodedPointValues = new ArrayList<>(); + encodedPointValues.add(values.getMinPackedValue().clone()); + encodedPointValues.add(values.getMaxPackedValue().clone()); + encodedPointValuesByField.put(info.name, encodedPointValues); } } - Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + if (extractedTerms.size() != 0) { + builder.add(new TermInSetQuery(queryTermsField.name(), extractedTerms), Occur.SHOULD); + } // include extractionResultField:failed, because docs with this term have no extractedTermsField // and otherwise we would fail to return these docs. Docs that failed query term extraction // always need to be verified by MemoryIndex: - Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED)); + builder.add(new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED)), Occur.SHOULD); - return new BooleanQuery.Builder() - .add(extractionSuccess, Occur.SHOULD) - .add(extractionFailure, Occur.SHOULD) - .build(); + for (Map.Entry> entry : encodedPointValuesByField.entrySet()) { + String rangeFieldName = entry.getKey(); + List encodedPointValues = entry.getValue(); + byte[] min = encodedPointValues.get(0); + byte[] max = encodedPointValues.get(1); + Query query = BinaryRange.newIntersectsQuery(rangeField.name(), encodeRange(rangeFieldName, min, max)); + builder.add(query, Occur.SHOULD); + } + return builder.build(); } } @@ -235,17 +273,20 @@ public class PercolatorFieldMapper extends FieldMapper { private KeywordFieldMapper extractionResultField; private BinaryFieldMapper queryBuilderField; - public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, + private RangeFieldMapper rangeFieldMapper; + + PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, Supplier queryShardContext, KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField, - BinaryFieldMapper queryBuilderField) { + BinaryFieldMapper queryBuilderField, RangeFieldMapper rangeFieldMapper) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); this.queryShardContext = queryShardContext; this.queryTermsField = queryTermsField; this.extractionResultField = extractionResultField; this.queryBuilderField = queryBuilderField; this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings); + this.rangeFieldMapper = rangeFieldMapper; } @Override @@ -254,9 +295,10 @@ public class PercolatorFieldMapper extends FieldMapper { KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType); KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType); BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType); + RangeFieldMapper rangeFieldMapperUpdated = (RangeFieldMapper) rangeFieldMapper.updateFieldType(fullNameToFieldType); if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField - && queryBuilderUpdated == queryBuilderField) { + && queryBuilderUpdated == queryBuilderField && rangeFieldMapperUpdated == rangeFieldMapper) { return this; } if (updated == this) { @@ -265,6 +307,7 @@ public class PercolatorFieldMapper extends FieldMapper { updated.queryTermsField = queryTermsUpdated; updated.extractionResultField = extractionResultUpdated; updated.queryBuilderField = queryBuilderUpdated; + updated.rangeFieldMapper = rangeFieldMapperUpdated; return updated; } @@ -310,12 +353,18 @@ public class PercolatorFieldMapper extends FieldMapper { doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); return; } - for (Term term : result.terms) { - BytesRefBuilder builder = new BytesRefBuilder(); - builder.append(new BytesRef(term.field())); - builder.append(FIELD_VALUE_SEPARATOR); - builder.append(term.bytes()); - doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType())); + for (QueryAnalyzer.QueryExtraction term : result.extractions) { + if (term.term != null) { + BytesRefBuilder builder = new BytesRefBuilder(); + builder.append(new BytesRef(term.field())); + builder.append(FIELD_VALUE_SEPARATOR); + builder.append(term.bytes()); + doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType())); + } else if (term.range != null) { + byte[] min = term.range.lowerPoint; + byte[] max = term.range.upperPoint; + doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(term.range.fieldName, min, max))); + } } if (result.verified) { doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType())); @@ -324,7 +373,7 @@ public class PercolatorFieldMapper extends FieldMapper { } } - public static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException { + static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException { return toQuery(context, mapUnmappedFieldsAsString, parseQueryBuilder(parser, parser.getTokenLocation())); } @@ -356,7 +405,7 @@ public class PercolatorFieldMapper extends FieldMapper { @Override public Iterator iterator() { - return Arrays.asList(queryTermsField, extractionResultField, queryBuilderField).iterator(); + return Arrays.asList(queryTermsField, extractionResultField, queryBuilderField, rangeFieldMapper).iterator(); } @Override @@ -369,7 +418,6 @@ public class PercolatorFieldMapper extends FieldMapper { return CONTENT_TYPE; } - /** * Fails if a percolator contains an unsupported query. The following queries are not supported: * 1) a has_child query @@ -405,4 +453,24 @@ public class PercolatorFieldMapper extends FieldMapper { } } + static byte[] encodeRange(String rangeFieldName, byte[] minEncoded, byte[] maxEncoded) { + assert minEncoded.length == maxEncoded.length; + byte[] bytes = new byte[BinaryRange.BYTES * 2]; + + // First compute hash for field name and write the full hash into the byte array + BytesRef fieldAsBytesRef = new BytesRef(rangeFieldName); + MurmurHash3.Hash128 hash = new MurmurHash3.Hash128(); + MurmurHash3.hash128(fieldAsBytesRef.bytes, fieldAsBytesRef.offset, fieldAsBytesRef.length, 0, hash); + ByteBuffer bb = ByteBuffer.wrap(bytes); + bb.putLong(hash.h1).putLong(hash.h2).putLong(hash.h1).putLong(hash.h2); + assert bb.position() == bb.limit(); + + // Secondly, overwrite the min and max encoded values in the byte array + // This way we are able to reuse as much as possible from the hash for any range type. + int offset = BinaryRange.BYTES - minEncoded.length; + System.arraycopy(minEncoded, 0, bytes, offset, minEncoded.length); + System.arraycopy(maxEncoded, 0, bytes, BinaryRange.BYTES + offset, maxEncoded.length); + return bytes; + } + } diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhase.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhase.java index dc0d3db0559..5d26993c034 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhase.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhase.java @@ -50,7 +50,7 @@ import java.util.Map; * Highlighting in the case of the percolate query is a bit different, because the PercolateQuery itself doesn't get highlighted, * but the source of the PercolateQuery gets highlighted by each hit containing a query. */ -public final class PercolatorHighlightSubFetchPhase extends HighlightPhase { +final class PercolatorHighlightSubFetchPhase extends HighlightPhase { PercolatorHighlightSubFetchPhase(Settings settings, Map highlighters) { super(settings, highlighters); diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 00c34e8e5e4..2c7f50497c2 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.percolator; +import org.apache.lucene.document.BinaryRange; import org.apache.lucene.index.PrefixCodedTerms; import org.apache.lucene.index.Term; import org.apache.lucene.queries.BlendedTermQuery; @@ -30,6 +31,7 @@ import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermInSetQuery; @@ -41,6 +43,7 @@ import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; @@ -51,10 +54,13 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.function.Function; -public final class QueryAnalyzer { +import static java.util.stream.Collectors.toSet; + +final class QueryAnalyzer { private static final Map, Function> queryProcessors; @@ -78,6 +84,7 @@ public final class QueryAnalyzer { map.put(DisjunctionMaxQuery.class, disjunctionMaxQuery()); map.put(SynonymQuery.class, synonymQuery()); map.put(FunctionScoreQuery.class, functionScoreQuery()); + map.put(PointRangeQuery.class, pointRangeQuery()); queryProcessors = Collections.unmodifiableMap(map); } @@ -85,7 +92,7 @@ public final class QueryAnalyzer { } /** - * Extracts terms from the provided query. These terms are stored with the percolator query and + * Extracts terms and ranges from the provided query. These terms and ranges are stored with the percolator query and * used by the percolate query's candidate query as fields to be query by. The candidate query * holds the terms from the document to be percolated and allows to the percolate query to ignore * percolator queries that we know would otherwise never match. @@ -104,11 +111,11 @@ public final class QueryAnalyzer { * since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored. * *

- * Sometimes the query analyzer can't always extract terms from a sub query, if that happens then + * Sometimes the query analyzer can't always extract terms or ranges from a sub query, if that happens then * query analysis is stopped and an UnsupportedQueryException is thrown. So that the caller can mark * this query in such a way that the PercolatorQuery always verifies if this query with the MemoryIndex. */ - public static Result analyze(Query query) { + static Result analyze(Query query) { Class queryClass = query.getClass(); if (queryClass.isAnonymousClass()) { // Sometimes queries have anonymous classes in that case we need the direct super class. @@ -123,65 +130,65 @@ public final class QueryAnalyzer { } } - static Function matchNoDocsQuery() { + private static Function matchNoDocsQuery() { return (query -> new Result(true, Collections.emptySet())); } - static Function constantScoreQuery() { + private static Function constantScoreQuery() { return query -> { Query wrappedQuery = ((ConstantScoreQuery) query).getQuery(); return analyze(wrappedQuery); }; } - static Function boostQuery() { + private static Function boostQuery() { return query -> { Query wrappedQuery = ((BoostQuery) query).getQuery(); return analyze(wrappedQuery); }; } - static Function termQuery() { + private static Function termQuery() { return (query -> { TermQuery termQuery = (TermQuery) query; - return new Result(true, Collections.singleton(termQuery.getTerm())); + return new Result(true, Collections.singleton(new QueryExtraction(termQuery.getTerm()))); }); } - static Function termInSetQuery() { + private static Function termInSetQuery() { return query -> { TermInSetQuery termInSetQuery = (TermInSetQuery) query; - Set terms = new HashSet<>(); + Set terms = new HashSet<>(); PrefixCodedTerms.TermIterator iterator = termInSetQuery.getTermData().iterator(); for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { - terms.add(new Term(iterator.field(), term)); + terms.add(new QueryExtraction(new Term(iterator.field(), term))); } return new Result(true, terms); }; } - static Function synonymQuery() { + private static Function synonymQuery() { return query -> { - Set terms = new HashSet<>(((SynonymQuery) query).getTerms()); + Set terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); return new Result(true, terms); }; } - static Function commonTermsQuery() { + private static Function commonTermsQuery() { return query -> { - List terms = ((CommonTermsQuery) query).getTerms(); - return new Result(false, new HashSet<>(terms)); + Set terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); + return new Result(false, terms); }; } - static Function blendedTermQuery() { + private static Function blendedTermQuery() { return query -> { - List terms = ((BlendedTermQuery) query).getTerms(); - return new Result(true, new HashSet<>(terms)); + Set terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); + return new Result(true, terms); }; } - static Function phraseQuery() { + private static Function phraseQuery() { return query -> { Term[] terms = ((PhraseQuery) query).getTerms(); if (terms.length == 0) { @@ -196,70 +203,71 @@ public final class QueryAnalyzer { longestTerm = term; } } - return new Result(false, Collections.singleton(longestTerm)); + return new Result(false, Collections.singleton(new QueryExtraction(longestTerm))); }; } - static Function multiPhraseQuery() { + private static Function multiPhraseQuery() { return query -> { Term[][] terms = ((MultiPhraseQuery) query).getTermArrays(); if (terms.length == 0) { return new Result(true, Collections.emptySet()); } - Set bestTermArr = null; + Set bestTermArr = null; for (Term[] termArr : terms) { - bestTermArr = selectTermListWithTheLongestShortestTerm(bestTermArr, new HashSet<>(Arrays.asList(termArr))); + Set queryExtractions = Arrays.stream(termArr).map(QueryExtraction::new).collect(toSet()); + bestTermArr = selectBestExtraction(bestTermArr, queryExtractions); } return new Result(false, bestTermArr); }; } - static Function spanTermQuery() { + private static Function spanTermQuery() { return query -> { Term term = ((SpanTermQuery) query).getTerm(); - return new Result(true, Collections.singleton(term)); + return new Result(true, Collections.singleton(new QueryExtraction(term))); }; } - static Function spanNearQuery() { + private static Function spanNearQuery() { return query -> { - Set bestClauses = null; + Set bestClauses = null; SpanNearQuery spanNearQuery = (SpanNearQuery) query; for (SpanQuery clause : spanNearQuery.getClauses()) { Result temp = analyze(clause); - bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses); + bestClauses = selectBestExtraction(temp.extractions, bestClauses); } return new Result(false, bestClauses); }; } - static Function spanOrQuery() { + private static Function spanOrQuery() { return query -> { - Set terms = new HashSet<>(); + Set terms = new HashSet<>(); SpanOrQuery spanOrQuery = (SpanOrQuery) query; for (SpanQuery clause : spanOrQuery.getClauses()) { - terms.addAll(analyze(clause).terms); + terms.addAll(analyze(clause).extractions); } return new Result(false, terms); }; } - static Function spanNotQuery() { + private static Function spanNotQuery() { return query -> { Result result = analyze(((SpanNotQuery) query).getInclude()); - return new Result(false, result.terms); + return new Result(false, result.extractions); }; } - static Function spanFirstQuery() { + private static Function spanFirstQuery() { return query -> { Result result = analyze(((SpanFirstQuery) query).getMatch()); - return new Result(false, result.terms); + return new Result(false, result.extractions); }; } - static Function booleanQuery() { + private static Function booleanQuery() { return query -> { BooleanQuery bq = (BooleanQuery) query; List clauses = bq.clauses(); @@ -279,7 +287,7 @@ public final class QueryAnalyzer { } } if (numRequiredClauses > 0) { - Set bestClause = null; + Set bestClause = null; UnsupportedQueryException uqe = null; for (BooleanClause clause : clauses) { if (clause.isRequired() == false) { @@ -296,7 +304,7 @@ public final class QueryAnalyzer { uqe = e; continue; } - bestClause = selectTermListWithTheLongestShortestTerm(temp.terms, bestClause); + bestClause = selectBestExtraction(temp.extractions, bestClause); } if (bestClause != null) { return new Result(false, bestClause); @@ -321,14 +329,14 @@ public final class QueryAnalyzer { }; } - static Function disjunctionMaxQuery() { + private static Function disjunctionMaxQuery() { return query -> { List disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts(); return handleDisjunction(disjuncts, 1, false); }; } - static Function functionScoreQuery() { + private static Function functionScoreQuery() { return query -> { FunctionScoreQuery functionScoreQuery = (FunctionScoreQuery) query; Result result = analyze(functionScoreQuery.getSubQuery()); @@ -337,60 +345,178 @@ public final class QueryAnalyzer { // (if it matches with the percolator document matches with the extracted terms. // Min score filters out docs, which is different than the functions, which just influences the score.) boolean verified = functionScoreQuery.getMinScore() == null; - return new Result(verified, result.terms); + return new Result(verified, result.extractions); }; } - static Result handleDisjunction(List disjunctions, int minimumShouldMatch, boolean otherClauses) { + private static Function pointRangeQuery() { + return query -> { + PointRangeQuery pointRangeQuery = (PointRangeQuery) query; + byte[] lowerPoint = pointRangeQuery.getLowerPoint(); + byte[] upperPoint = pointRangeQuery.getUpperPoint(); + byte[] interval = new byte[16]; + NumericUtils.subtract(16, 0, prepad(upperPoint), prepad(lowerPoint), interval); + return new Result(false, Collections.singleton(new QueryExtraction( + new Range(pointRangeQuery.getField(), lowerPoint, upperPoint, interval)) + )); + }; + } + + private static byte[] prepad(byte[] original) { + int offset = BinaryRange.BYTES - original.length; + byte[] result = new byte[BinaryRange.BYTES]; + System.arraycopy(original, 0, result, offset, original.length); + return result; + } + + private static Result handleDisjunction(List disjunctions, int minimumShouldMatch, boolean otherClauses) { boolean verified = minimumShouldMatch <= 1 && otherClauses == false; - Set terms = new HashSet<>(); + Set terms = new HashSet<>(); for (Query disjunct : disjunctions) { Result subResult = analyze(disjunct); if (subResult.verified == false) { verified = false; } - terms.addAll(subResult.terms); + terms.addAll(subResult.extractions); } return new Result(verified, terms); } - static Set selectTermListWithTheLongestShortestTerm(Set terms1, Set terms2) { - if (terms1 == null) { - return terms2; - } else if (terms2 == null) { - return terms1; + static Set selectBestExtraction(Set extractions1, Set extractions2) { + assert extractions1 != null || extractions2 != null; + if (extractions1 == null) { + return extractions2; + } else if (extractions2 == null) { + return extractions1; } else { - int terms1ShortestTerm = minTermLength(terms1); - int terms2ShortestTerm = minTermLength(terms2); - // keep the clause with longest terms, this likely to be rarest. - if (terms1ShortestTerm >= terms2ShortestTerm) { - return terms1; + // Prefer term based extractions over range based extractions: + boolean onlyRangeBasedExtractions = true; + for (QueryExtraction clause : extractions1) { + if (clause.term != null) { + onlyRangeBasedExtractions = false; + break; + } + } + for (QueryExtraction clause : extractions2) { + if (clause.term != null) { + onlyRangeBasedExtractions = false; + break; + } + } + + if (onlyRangeBasedExtractions) { + BytesRef terms1SmallestRange = smallestRange(extractions1); + BytesRef terms2SmallestRange = smallestRange(extractions2); + // Keep the clause with smallest range, this is likely to be the rarest. + if (terms1SmallestRange.compareTo(terms2SmallestRange) <= 0) { + return extractions1; + } else { + return extractions2; + } } else { - return terms2; + int terms1ShortestTerm = minTermLength(extractions1); + int terms2ShortestTerm = minTermLength(extractions2); + // keep the clause with longest terms, this likely to be rarest. + if (terms1ShortestTerm >= terms2ShortestTerm) { + return extractions1; + } else { + return extractions2; + } } } } - static int minTermLength(Set terms) { + private static int minTermLength(Set extractions) { + // In case there are only range extractions, then we return Integer.MIN_VALUE, + // so that selectBestExtraction(...) we are likely to prefer the extractions that contains at least a single extraction + if (extractions.stream().filter(queryExtraction -> queryExtraction.term != null).count() == 0 && + extractions.stream().filter(queryExtraction -> queryExtraction.range != null).count() > 0) { + return Integer.MIN_VALUE; + } + int min = Integer.MAX_VALUE; - for (Term term : terms) { - min = Math.min(min, term.bytes().length); + for (QueryExtraction qt : extractions) { + if (qt.term != null) { + min = Math.min(min, qt.bytes().length); + } + } + return min; + } + + private static BytesRef smallestRange(Set terms) { + BytesRef min = terms.iterator().next().range.interval; + for (QueryExtraction qt : terms) { + if (qt.range != null) { + if (qt.range.interval.compareTo(min) < 0) { + min = qt.range.interval; + } + } } return min; } static class Result { - final Set terms; + final Set extractions; final boolean verified; - Result(boolean verified, Set terms) { - this.terms = terms; + Result(boolean verified, Set extractions) { + this.extractions = extractions; this.verified = verified; } } + static class QueryExtraction { + + final Term term; + final Range range; + + QueryExtraction(Term term) { + this.term = term; + this.range = null; + } + + QueryExtraction(Range range) { + this.term = null; + this.range = range; + } + + String field() { + return term != null ? term.field() : null; + } + + BytesRef bytes() { + return term != null ? term.bytes() : null; + } + + String text() { + return term != null ? term.text() : null; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + QueryExtraction queryExtraction = (QueryExtraction) o; + return Objects.equals(term, queryExtraction.term) && + Objects.equals(range, queryExtraction.range); + } + + @Override + public int hashCode() { + return Objects.hash(term, range); + } + + @Override + public String toString() { + return "QueryExtraction{" + + "term=" + term + + ",range=" + range + + '}'; + } + } + /** * Exception indicating that none or some query terms couldn't extracted from a percolator query. */ @@ -406,9 +532,52 @@ public final class QueryAnalyzer { /** * The actual Lucene query that was unsupported and caused this exception to be thrown. */ - public Query getUnsupportedQuery() { + Query getUnsupportedQuery() { return unsupportedQuery; } } + static class Range { + + final String fieldName; + final byte[] lowerPoint; + final byte[] upperPoint; + final BytesRef interval; + + Range(String fieldName, byte[] lowerPoint, byte[] upperPoint, byte[] interval) { + this.fieldName = fieldName; + this.lowerPoint = lowerPoint; + this.upperPoint = upperPoint; + // using BytesRef here just to make use of its compareTo method. + this.interval = new BytesRef(interval); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Range range = (Range) o; + return Objects.equals(fieldName, range.fieldName) && + Arrays.equals(lowerPoint, range.lowerPoint) && + Arrays.equals(upperPoint, range.upperPoint); + } + + @Override + public int hashCode() { + int result = 1; + result += 31 * fieldName.hashCode(); + result += Arrays.hashCode(lowerPoint); + result += Arrays.hashCode(upperPoint); + return result; + } + + @Override + public String toString() { + return "Range{" + + ", fieldName='" + fieldName + '\'' + + ", interval=" + interval + + '}'; + } + } + } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java index aaef648cb05..2d78a0db63d 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java @@ -21,7 +21,12 @@ package org.elasticsearch.percolator; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.HalfFloatPoint; +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; @@ -81,6 +86,7 @@ import java.util.List; import java.util.Set; import java.util.function.Function; +import static org.elasticsearch.common.network.InetAddresses.forString; import static org.hamcrest.Matchers.equalTo; public class CandidateQueryTests extends ESSingleNodeTestCase { @@ -287,6 +293,174 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { duelRun(queryStore, memoryIndex, shardSearcher); } + public void testRangeQueries() throws Exception { + List docs = new ArrayList<>(); + addQuery(IntPoint.newRangeQuery("int_field", 0, 5), docs); + addQuery(LongPoint.newRangeQuery("long_field", 5L, 10L), docs); + addQuery(HalfFloatPoint.newRangeQuery("half_float_field", 10, 15), docs); + addQuery(FloatPoint.newRangeQuery("float_field", 15, 20), docs); + addQuery(DoublePoint.newRangeQuery("double_field", 20, 25), docs); + addQuery(InetAddressPoint.newRangeQuery("ip_field", forString("192.168.0.1"), forString("192.168.0.10")), docs); + indexWriter.addDocuments(docs); + indexWriter.close(); + directoryReader = DirectoryReader.open(directory); + IndexSearcher shardSearcher = newSearcher(directoryReader); + shardSearcher.setQueryCache(null); + + MemoryIndex memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new IntPoint("int_field", 3)), new WhitespaceAnalyzer()); + IndexSearcher percolateSearcher = memoryIndex.createSearcher(); + Query query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher); + TopDocs topDocs = shardSearcher.search(query, 1); + assertEquals(1L, topDocs.totalHits); + assertEquals(1, topDocs.scoreDocs.length); + assertEquals(0, topDocs.scoreDocs[0].doc); + + memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new LongPoint("long_field", 7L)), new WhitespaceAnalyzer()); + percolateSearcher = memoryIndex.createSearcher(); + query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher); + topDocs = shardSearcher.search(query, 1); + assertEquals(1L, topDocs.totalHits); + assertEquals(1, topDocs.scoreDocs.length); + assertEquals(1, topDocs.scoreDocs[0].doc); + + memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new HalfFloatPoint("half_float_field", 12)), + new WhitespaceAnalyzer()); + percolateSearcher = memoryIndex.createSearcher(); + query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher); + topDocs = shardSearcher.search(query, 1); + assertEquals(1L, topDocs.totalHits); + assertEquals(1, topDocs.scoreDocs.length); + assertEquals(2, topDocs.scoreDocs[0].doc); + + memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new FloatPoint("float_field", 17)), new WhitespaceAnalyzer()); + percolateSearcher = memoryIndex.createSearcher(); + query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher); + topDocs = shardSearcher.search(query, 1); + assertEquals(1, topDocs.totalHits); + assertEquals(1, topDocs.scoreDocs.length); + assertEquals(3, topDocs.scoreDocs[0].doc); + + memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new DoublePoint("double_field", 21)), new WhitespaceAnalyzer()); + percolateSearcher = memoryIndex.createSearcher(); + query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher); + topDocs = shardSearcher.search(query, 1); + assertEquals(1, topDocs.totalHits); + assertEquals(1, topDocs.scoreDocs.length); + assertEquals(4, topDocs.scoreDocs[0].doc); + + memoryIndex = MemoryIndex.fromDocument(Collections.singleton(new InetAddressPoint("ip_field", + forString("192.168.0.4"))), new WhitespaceAnalyzer()); + percolateSearcher = memoryIndex.createSearcher(); + query = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher); + topDocs = shardSearcher.search(query, 1); + assertEquals(1, topDocs.totalHits); + assertEquals(1, topDocs.scoreDocs.length); + assertEquals(5, topDocs.scoreDocs[0].doc); + } + + public void testDuelRangeQueries() throws Exception { + List documents = new ArrayList<>(); + + int lowerInt = randomIntBetween(0, 256); + int upperInt = lowerInt + randomIntBetween(0, 32); + addQuery(IntPoint.newRangeQuery("int_field", lowerInt, upperInt), documents); + + long lowerLong = randomIntBetween(0, 256); + long upperLong = lowerLong + randomIntBetween(0, 32); + addQuery(LongPoint.newRangeQuery("long_field", lowerLong, upperLong), documents); + + float lowerHalfFloat = randomIntBetween(0, 256); + float upperHalfFloat = lowerHalfFloat + randomIntBetween(0, 32); + addQuery(HalfFloatPoint.newRangeQuery("half_float_field", lowerHalfFloat, upperHalfFloat), documents); + + float lowerFloat = randomIntBetween(0, 256); + float upperFloat = lowerFloat + randomIntBetween(0, 32); + addQuery(FloatPoint.newRangeQuery("float_field", lowerFloat, upperFloat), documents); + + double lowerDouble = randomDoubleBetween(0, 256, true); + double upperDouble = lowerDouble + randomDoubleBetween(0, 32, true); + addQuery(DoublePoint.newRangeQuery("double_field", lowerDouble, upperDouble), documents); + + int lowerIpPart = randomIntBetween(0, 255); + int upperIpPart = randomIntBetween(lowerIpPart, 255); + addQuery(InetAddressPoint.newRangeQuery("ip_field", forString("192.168.1." + lowerIpPart), + forString("192.168.1." + upperIpPart)), documents); + + indexWriter.addDocuments(documents); + indexWriter.close(); + directoryReader = DirectoryReader.open(directory); + IndexSearcher shardSearcher = newSearcher(directoryReader); + // Disable query cache, because ControlQuery cannot be cached... + shardSearcher.setQueryCache(null); + + int randomInt = randomIntBetween(lowerInt, upperInt); + Iterable doc = Collections.singleton(new IntPoint("int_field", randomInt)); + MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + TopDocs result = executeQuery(queryStore, memoryIndex, shardSearcher); + assertThat(result.scoreDocs.length, equalTo(1)); + assertThat(result.scoreDocs[0].doc, equalTo(0)); + duelRun(queryStore, memoryIndex, shardSearcher); + doc = Collections.singleton(new IntPoint("int_field", randomInt())); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + + long randomLong = randomIntBetween((int) lowerLong, (int) upperLong); + doc = Collections.singleton(new LongPoint("long_field", randomLong)); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + result = executeQuery(queryStore, memoryIndex, shardSearcher); + assertThat(result.scoreDocs.length, equalTo(1)); + assertThat(result.scoreDocs[0].doc, equalTo(1)); + duelRun(queryStore, memoryIndex, shardSearcher); + doc = Collections.singleton(new LongPoint("long_field", randomLong())); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + + float randomHalfFloat = randomIntBetween((int) lowerHalfFloat, (int) upperHalfFloat); + doc = Collections.singleton(new HalfFloatPoint("half_float_field", randomHalfFloat)); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + result = executeQuery(queryStore, memoryIndex, shardSearcher); + assertThat(result.scoreDocs.length, equalTo(1)); + assertThat(result.scoreDocs[0].doc, equalTo(2)); + duelRun(queryStore, memoryIndex, shardSearcher); + doc = Collections.singleton(new HalfFloatPoint("half_float_field", randomFloat())); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + + float randomFloat = randomIntBetween((int) lowerFloat, (int) upperFloat); + doc = Collections.singleton(new FloatPoint("float_field", randomFloat)); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + result = executeQuery(queryStore, memoryIndex, shardSearcher); + assertThat(result.scoreDocs.length, equalTo(1)); + assertThat(result.scoreDocs[0].doc, equalTo(3)); + duelRun(queryStore, memoryIndex, shardSearcher); + doc = Collections.singleton(new FloatPoint("float_field", randomFloat())); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + + double randomDouble = randomDoubleBetween(lowerDouble, upperDouble, true); + doc = Collections.singleton(new DoublePoint("double_field", randomDouble)); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + result = executeQuery(queryStore, memoryIndex, shardSearcher); + assertThat(result.scoreDocs.length, equalTo(1)); + assertThat(result.scoreDocs[0].doc, equalTo(4)); + duelRun(queryStore, memoryIndex, shardSearcher); + doc = Collections.singleton(new DoublePoint("double_field", randomFloat())); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + + doc = Collections.singleton(new InetAddressPoint("ip_field", + forString("192.168.1." + randomIntBetween(lowerIpPart, upperIpPart)))); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + result = executeQuery(queryStore, memoryIndex, shardSearcher); + assertThat(result.scoreDocs.length, equalTo(1)); + assertThat(result.scoreDocs[0].doc, equalTo(5)); + duelRun(queryStore, memoryIndex, shardSearcher); + doc = Collections.singleton(new InetAddressPoint("ip_field", + forString("192.168.1." + randomIntBetween(0, 255)))); + memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + } + private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException { boolean requireScore = randomBoolean(); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); @@ -319,6 +493,14 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { queries.add(query); } + private TopDocs executeQuery(PercolateQuery.QueryStore queryStore, + MemoryIndex memoryIndex, + IndexSearcher shardSearcher) throws IOException { + IndexSearcher percolateSearcher = memoryIndex.createSearcher(); + Query percolateQuery = fieldType.percolateQuery(queryStore, new BytesArray("{}"), percolateSearcher); + return shardSearcher.search(percolateQuery, 10); + } + private static final class CustomQuery extends Query { private final Term term; diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java index 24e92c4fe3b..13ba4667789 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java @@ -20,6 +20,11 @@ package org.elasticsearch.percolator; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.HalfFloatPoint; +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; @@ -38,6 +43,8 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.hash.MurmurHash3; +import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; @@ -77,6 +84,7 @@ import org.elasticsearch.test.InternalSettingsPlugin; import org.junit.Before; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -128,7 +136,13 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { .startObject("field2").field("type", "text").endObject() .startObject("_field3").field("type", "text").endObject() .startObject("field4").field("type", "text").endObject() - .startObject("number_field").field("type", "long").endObject() + .startObject("number_field1").field("type", "integer").endObject() + .startObject("number_field2").field("type", "long").endObject() + .startObject("number_field3").field("type", "long").endObject() + .startObject("number_field4").field("type", "half_float").endObject() + .startObject("number_field5").field("type", "float").endObject() + .startObject("number_field6").field("type", "double").endObject() + .startObject("number_field7").field("type", "ip").endObject() .startObject("date_field").field("type", "date").endObject() .endObject().endObject().endObject().string(); mapperService.merge("doc", new CompressedXContent(mapper), MapperService.MergeReason.MAPPING_UPDATE, false); @@ -206,12 +220,12 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer()); memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer()); memoryIndex.addField("field4", "123", new WhitespaceAnalyzer()); - memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer()); + memoryIndex.addField(new LongPoint("number_field2", 10L), new WhitespaceAnalyzer()); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader); - assertEquals(2, candidateQuery.clauses().size()); + assertEquals(3, candidateQuery.clauses().size()); assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur()); TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery(); @@ -236,6 +250,54 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur()); assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)), candidateQuery.clauses().get(1).getQuery()); + + assertEquals(Occur.SHOULD, candidateQuery.clauses().get(2).getOccur()); + assertThat(candidateQuery.clauses().get(2).getQuery().toString(), containsString(fieldName + ".range_field: terms = new ArrayList<>(result.terms); + List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field())); assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes())); @@ -75,8 +86,8 @@ public class QueryAnalyzerTests extends ESTestCase { TermInSetQuery termsQuery = new TermInSetQuery("_field", new BytesRef("_term1"), new BytesRef("_term2")); Result result = analyze(termsQuery); assertThat(result.verified, is(true)); - List terms = new ArrayList<>(result.terms); - Collections.sort(terms); + List terms = new ArrayList<>(result.extractions); + terms.sort(Comparator.comparing(qt -> qt.term)); assertThat(terms.size(), equalTo(2)); assertThat(terms.get(0).field(), equalTo("_field")); assertThat(terms.get(0).text(), equalTo("_term1")); @@ -88,7 +99,7 @@ public class QueryAnalyzerTests extends ESTestCase { PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2"); Result result = analyze(phraseQuery); assertThat(result.verified, is(false)); - List terms = new ArrayList<>(result.terms); + List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field())); assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes())); @@ -96,14 +107,14 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_multiPhraseQuery() { MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder() - .add(new Term("_field", "_long_term")) - .add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_term")}) - .add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")}) - .add(new Term[] {new Term("_field", "_very_long_term")}) - .build(); + .add(new Term("_field", "_long_term")) + .add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_term")}) + .add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")}) + .add(new Term[] {new Term("_field", "_very_long_term")}) + .build(); Result result = analyze(multiPhraseQuery); assertThat(result.verified, is(false)); - List terms = new ArrayList<>(result.terms); + List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); assertThat(terms.get(0).field(), equalTo("_field")); assertThat(terms.get(0).bytes().utf8ToString(), equalTo("_very_long_term")); @@ -126,8 +137,8 @@ public class QueryAnalyzerTests extends ESTestCase { BooleanQuery booleanQuery = builder.build(); Result result = analyze(booleanQuery); assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false)); - List terms = new ArrayList<>(result.terms); - Collections.sort(terms); + List terms = new ArrayList<>(result.extractions); + terms.sort(Comparator.comparing(qt -> qt.term)); assertThat(terms.size(), equalTo(3)); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); @@ -154,8 +165,8 @@ public class QueryAnalyzerTests extends ESTestCase { BooleanQuery booleanQuery = builder.build(); Result result = analyze(booleanQuery); assertThat(result.verified, is(true)); - List terms = new ArrayList<>(result.terms); - Collections.sort(terms); + List terms = new ArrayList<>(result.extractions); + terms.sort(Comparator.comparing(qt -> qt.term)); assertThat(terms.size(), equalTo(4)); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); @@ -177,7 +188,7 @@ public class QueryAnalyzerTests extends ESTestCase { BooleanQuery booleanQuery = builder.build(); Result result = analyze(booleanQuery); assertThat(result.verified, is(false)); - List terms = new ArrayList<>(result.terms); + List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field())); assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes())); @@ -242,7 +253,7 @@ public class QueryAnalyzerTests extends ESTestCase { ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1); Result result = analyze(constantScoreQuery); assertThat(result.verified, is(true)); - List terms = new ArrayList<>(result.terms); + List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); @@ -253,7 +264,7 @@ public class QueryAnalyzerTests extends ESTestCase { BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f); Result result = analyze(constantScoreQuery); assertThat(result.verified, is(true)); - List terms = new ArrayList<>(result.terms); + List terms = new ArrayList<>(result.extractions); assertThat(terms.size(), equalTo(1)); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); @@ -265,8 +276,8 @@ public class QueryAnalyzerTests extends ESTestCase { commonTermsQuery.add(new Term("_field", "_term2")); Result result = analyze(commonTermsQuery); assertThat(result.verified, is(false)); - List terms = new ArrayList<>(result.terms); - Collections.sort(terms); + List terms = new ArrayList<>(result.extractions); + terms.sort(Comparator.comparing(qt -> qt.term)); assertThat(terms.size(), equalTo(2)); assertThat(terms.get(0).field(), equalTo("_field")); assertThat(terms.get(0).text(), equalTo("_term1")); @@ -279,8 +290,8 @@ public class QueryAnalyzerTests extends ESTestCase { BlendedTermQuery commonTermsQuery = BlendedTermQuery.dismaxBlendedQuery(termsArr, 1.0f); Result result = analyze(commonTermsQuery); assertThat(result.verified, is(true)); - List terms = new ArrayList<>(result.terms); - Collections.sort(terms); + List terms = new ArrayList<>(result.extractions); + terms.sort(Comparator.comparing(qt -> qt.term)); assertThat(terms.size(), equalTo(2)); assertThat(terms.get(0).field(), equalTo("_field")); assertThat(terms.get(0).text(), equalTo("_term1")); @@ -303,18 +314,18 @@ public class QueryAnalyzerTests extends ESTestCase { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); Result result = analyze(spanTermQuery1); assertThat(result.verified, is(true)); - assertTermsEqual(result.terms, spanTermQuery1.getTerm()); + assertTermsEqual(result.extractions, spanTermQuery1.getTerm()); } public void testExtractQueryMetadata_spanNearQuery() { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true) - .addClause(spanTermQuery1).addClause(spanTermQuery2).build(); + .addClause(spanTermQuery1).addClause(spanTermQuery2).build(); Result result = analyze(spanNearQuery); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, spanTermQuery2.getTerm()); + assertTermsEqual(result.extractions, spanTermQuery2.getTerm()); } public void testExtractQueryMetadata_spanOrQuery() { @@ -323,7 +334,7 @@ public class QueryAnalyzerTests extends ESTestCase { SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2); Result result = analyze(spanOrQuery); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm()); + assertTermsEqual(result.extractions, spanTermQuery1.getTerm(), spanTermQuery2.getTerm()); } public void testExtractQueryMetadata_spanFirstQuery() { @@ -331,7 +342,7 @@ public class QueryAnalyzerTests extends ESTestCase { SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20); Result result = analyze(spanFirstQuery); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, spanTermQuery1.getTerm()); + assertTermsEqual(result.extractions, spanTermQuery1.getTerm()); } public void testExtractQueryMetadata_spanNotQuery() { @@ -340,35 +351,35 @@ public class QueryAnalyzerTests extends ESTestCase { SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2); Result result = analyze(spanNotQuery); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, spanTermQuery1.getTerm()); + assertTermsEqual(result.extractions, spanTermQuery1.getTerm()); } public void testExtractQueryMetadata_matchNoDocsQuery() { Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all")); assertThat(result.verified, is(true)); - assertEquals(0, result.terms.size()); + assertEquals(0, result.extractions.size()); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST); result = analyze(bq.build()); assertThat(result.verified, is(false)); - assertEquals(0, result.terms.size()); + assertEquals(0, result.extractions.size()); bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD); result = analyze(bq.build()); assertThat(result.verified, is(true)); - assertTermsEqual(result.terms, new Term("field", "value")); + assertTermsEqual(result.extractions, new Term("field", "value")); DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery( - Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")), - 1f + Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")), + 1f ); result = analyze(disjunctionMaxQuery); assertThat(result.verified, is(true)); - assertTermsEqual(result.terms, new Term("field", "value")); + assertTermsEqual(result.extractions, new Term("field", "value")); } public void testExtractQueryMetadata_matchAllDocsQuery() { @@ -379,7 +390,7 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); Result result = analyze(builder.build()); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, new Term("field", "value")); + assertTermsEqual(result.extractions, new Term("field", "value")); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); @@ -442,7 +453,7 @@ public class QueryAnalyzerTests extends ESTestCase { Result result = analyze(bq1); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, termQuery1.getTerm()); + assertTermsEqual(result.extractions, termQuery1.getTerm()); TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term")); builder = new BooleanQuery.Builder(); @@ -452,7 +463,7 @@ public class QueryAnalyzerTests extends ESTestCase { bq1 = builder.build(); result = analyze(bq1); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, termQuery2.getTerm()); + assertTermsEqual(result.extractions, termQuery2.getTerm()); builder = new BooleanQuery.Builder(); builder.add(unsupportedQuery, BooleanClause.Occur.MUST); @@ -468,13 +479,13 @@ public class QueryAnalyzerTests extends ESTestCase { TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3")); TermQuery termQuery4 = new TermQuery(new Term("_field", "_term4")); DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery( - Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f + Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f ); Result result = analyze(disjunctionMaxQuery); assertThat(result.verified, is(true)); - List terms = new ArrayList<>(result.terms); - Collections.sort(terms); + List terms = new ArrayList<>(result.extractions); + terms.sort(Comparator.comparing(qt -> qt.term)); assertThat(terms.size(), equalTo(4)); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); @@ -486,13 +497,13 @@ public class QueryAnalyzerTests extends ESTestCase { assertThat(terms.get(3).bytes(), equalTo(termQuery4.getTerm().bytes())); disjunctionMaxQuery = new DisjunctionMaxQuery( - Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f + Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f ); result = analyze(disjunctionMaxQuery); assertThat(result.verified, is(false)); - terms = new ArrayList<>(result.terms); - Collections.sort(terms); + terms = new ArrayList<>(result.extractions); + terms.sort(Comparator.comparing(qt -> qt.term)); assertThat(terms.size(), equalTo(4)); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); @@ -508,12 +519,12 @@ public class QueryAnalyzerTests extends ESTestCase { SynonymQuery query = new SynonymQuery(); Result result = analyze(query); assertThat(result.verified, is(true)); - assertThat(result.terms.isEmpty(), is(true)); + assertThat(result.extractions.isEmpty(), is(true)); query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2")); result = analyze(query); assertThat(result.verified, is(true)); - assertTermsEqual(result.terms, new Term("_field", "_value1"), new Term("_field", "_value2")); + assertTermsEqual(result.extractions, new Term("_field", "_value1"), new Term("_field", "_value2")); } public void testFunctionScoreQuery() { @@ -521,42 +532,211 @@ public class QueryAnalyzerTests extends ESTestCase { FunctionScoreQuery functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(0, 0, null)); Result result = analyze(functionScoreQuery); assertThat(result.verified, is(true)); - assertTermsEqual(result.terms, new Term("_field", "_value")); + assertTermsEqual(result.extractions, new Term("_field", "_value")); functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(0, 0, null), 1f, null, 10f); result = analyze(functionScoreQuery); assertThat(result.verified, is(false)); - assertTermsEqual(result.terms, new Term("_field", "_value")); + assertTermsEqual(result.extractions, new Term("_field", "_value")); } - public void testSelectTermsListWithHighestSumOfTermLength() { - Set terms1 = new HashSet<>(); + public void testSelectBestExtraction() { + Set queryTerms1 = terms(new int[0], "12", "1234", "12345"); + Set queryTerms2 = terms(new int[0], "123", "1234", "12345"); + Set result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame(queryTerms2, result); + + queryTerms1 = terms(new int[]{1, 2, 3}); + queryTerms2 = terms(new int[]{2, 3, 4}); + result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + + queryTerms1 = terms(new int[]{4, 5, 6}); + queryTerms2 = terms(new int[]{1, 2, 3}); + result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame(queryTerms2, result); + + queryTerms1 = terms(new int[]{1, 2, 3}, "123", "456"); + queryTerms2 = terms(new int[]{2, 3, 4}, "123", "456"); + result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + + queryTerms1 = terms(new int[]{10}); + queryTerms2 = terms(new int[]{1}); + result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame(queryTerms2, result); + + queryTerms1 = terms(new int[]{10}, "123"); + queryTerms2 = terms(new int[]{1}); + result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + + queryTerms1 = terms(new int[]{10}, "1", "123"); + queryTerms2 = terms(new int[]{1}, "1", "2"); + result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame(queryTerms1, result); + + queryTerms1 = terms(new int[]{1, 2, 3}, "123", "456"); + queryTerms2 = terms(new int[]{2, 3, 4}, "1", "456"); + result = selectBestExtraction(queryTerms1, queryTerms2); + assertSame("Ignoring ranges, so then prefer queryTerms1, because it has the longest shortest term", queryTerms1, result); + } + + public void testSelectBestExtraction_random() { + Set terms1 = new HashSet<>(); int shortestTerms1Length = Integer.MAX_VALUE; int sumTermLength = randomIntBetween(1, 128); while (sumTermLength > 0) { int length = randomInt(sumTermLength); shortestTerms1Length = Math.min(shortestTerms1Length, length); - terms1.add(new Term("field", randomAlphaOfLength(length))); + terms1.add(new QueryExtraction(new Term("field", randomAlphaOfLength(length)))); sumTermLength -= length; } - Set terms2 = new HashSet<>(); + Set terms2 = new HashSet<>(); int shortestTerms2Length = Integer.MAX_VALUE; sumTermLength = randomIntBetween(1, 128); while (sumTermLength > 0) { int length = randomInt(sumTermLength); shortestTerms2Length = Math.min(shortestTerms2Length, length); - terms2.add(new Term("field", randomAlphaOfLength(length))); + terms2.add(new QueryExtraction(new Term("field", randomAlphaOfLength(length)))); sumTermLength -= length; } - Set result = selectTermListWithTheLongestShortestTerm(terms1, terms2); - Set expected = shortestTerms1Length >= shortestTerms2Length ? terms1 : terms2; + Set result = selectBestExtraction(terms1, terms2); + Set expected = shortestTerms1Length >= shortestTerms2Length ? terms1 : terms2; assertThat(result, sameInstance(expected)); } - private static void assertTermsEqual(Set actual, Term... expected) { - assertEquals(new HashSet<>(Arrays.asList(expected)), actual); + public void testPointRangeQuery() { + // int ranges get converted to long ranges: + Query query = IntPoint.newRangeQuery("_field", 10, 20); + Result result = analyze(query); + assertFalse(result.verified); + List ranges = new ArrayList<>(result.extractions); + assertThat(ranges.size(), equalTo(1)); + assertNull(ranges.get(0).term); + assertEquals("_field", ranges.get(0).range.fieldName); + assertDimension(ranges.get(0).range.lowerPoint, bytes -> IntPoint.encodeDimension(10, bytes, 0)); + assertDimension(ranges.get(0).range.upperPoint, bytes -> IntPoint.encodeDimension(20, bytes, 0)); + + query = LongPoint.newRangeQuery("_field", 10L, 21L); + result = analyze(query); + assertFalse(result.verified); + ranges = new ArrayList<>(result.extractions); + assertThat(ranges.size(), equalTo(1)); + assertNull(ranges.get(0).term); + assertEquals("_field", ranges.get(0).range.fieldName); + assertDimension(ranges.get(0).range.lowerPoint, bytes -> LongPoint.encodeDimension(10L, bytes, 0)); + assertDimension(ranges.get(0).range.upperPoint, bytes -> LongPoint.encodeDimension(21L, bytes, 0)); + + // Half float ranges get converted to double ranges: + query = HalfFloatPoint.newRangeQuery("_field", 10F, 20F); + result = analyze(query); + assertFalse(result.verified); + ranges = new ArrayList<>(result.extractions); + assertThat(ranges.size(), equalTo(1)); + assertNull(ranges.get(0).term); + assertEquals("_field", ranges.get(0).range.fieldName); + assertDimension(ranges.get(0).range.lowerPoint, bytes -> HalfFloatPoint.encodeDimension(10F, bytes, 0)); + assertDimension(ranges.get(0).range.upperPoint, bytes -> HalfFloatPoint.encodeDimension(20F, bytes, 0)); + + // Float ranges get converted to double ranges: + query = FloatPoint.newRangeQuery("_field", 10F, 20F); + result = analyze(query); + assertFalse(result.verified); + ranges = new ArrayList<>(result.extractions); + assertThat(ranges.size(), equalTo(1)); + assertNull(ranges.get(0).term); + assertEquals("_field", ranges.get(0).range.fieldName); + assertDimension(ranges.get(0).range.lowerPoint, bytes -> FloatPoint.encodeDimension(10F, bytes, 0)); + assertDimension(ranges.get(0).range.upperPoint, bytes -> FloatPoint.encodeDimension(20F, bytes, 0)); + + query = DoublePoint.newRangeQuery("_field", 10D, 20D); + result = analyze(query); + assertFalse(result.verified); + ranges = new ArrayList<>(result.extractions); + assertThat(ranges.size(), equalTo(1)); + assertNull(ranges.get(0).term); + assertEquals("_field", ranges.get(0).range.fieldName); + assertDimension(ranges.get(0).range.lowerPoint, bytes -> DoublePoint.encodeDimension(10D, bytes, 0)); + assertDimension(ranges.get(0).range.upperPoint, bytes -> DoublePoint.encodeDimension(20D, bytes, 0)); + + query = InetAddressPoint.newRangeQuery("_field", InetAddresses.forString("192.168.1.0"), + InetAddresses.forString("192.168.1.255")); + result = analyze(query); + assertFalse(result.verified); + ranges = new ArrayList<>(result.extractions); + assertThat(ranges.size(), equalTo(1)); + assertNull(ranges.get(0).term); + assertEquals("_field", ranges.get(0).range.fieldName); + assertArrayEquals(ranges.get(0).range.lowerPoint, InetAddressPoint.encode(InetAddresses.forString("192.168.1.0"))); + assertArrayEquals(ranges.get(0).range.upperPoint, InetAddressPoint.encode(InetAddresses.forString("192.168.1.255"))); + } + + public void testPointRangeQuerySelectShortestRange() { + BooleanQuery.Builder boolQuery = new BooleanQuery.Builder(); + boolQuery.add(LongPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); + boolQuery.add(LongPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); + Result result = analyze(boolQuery.build()); + assertFalse(result.verified); + assertEquals(1, result.extractions.size()); + assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); + + boolQuery = new BooleanQuery.Builder(); + boolQuery.add(LongPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); + boolQuery.add(IntPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); + result = analyze(boolQuery.build()); + assertFalse(result.verified); + assertEquals(1, result.extractions.size()); + assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); + + boolQuery = new BooleanQuery.Builder(); + boolQuery.add(DoublePoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); + boolQuery.add(DoublePoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); + result = analyze(boolQuery.build()); + assertFalse(result.verified); + assertEquals(1, result.extractions.size()); + assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); + + boolQuery = new BooleanQuery.Builder(); + boolQuery.add(DoublePoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); + boolQuery.add(FloatPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); + result = analyze(boolQuery.build()); + assertFalse(result.verified); + assertEquals(1, result.extractions.size()); + assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); + + boolQuery = new BooleanQuery.Builder(); + boolQuery.add(HalfFloatPoint.newRangeQuery("_field1", 10, 20), BooleanClause.Occur.FILTER); + boolQuery.add(HalfFloatPoint.newRangeQuery("_field2", 10, 15), BooleanClause.Occur.FILTER); + result = analyze(boolQuery.build()); + assertFalse(result.verified); + assertEquals(1, result.extractions.size()); + assertEquals("_field2", new ArrayList<>(result.extractions).get(0).range.fieldName); + } + + private static void assertDimension(byte[] expected, Consumer consumer) { + byte[] dest = new byte[expected.length]; + consumer.accept(dest); + assertArrayEquals(expected, dest); + } + + private static void assertTermsEqual(Set actual, Term... expected) { + assertEquals(Arrays.stream(expected).map(QueryExtraction::new).collect(Collectors.toSet()), actual); + } + + private static Set terms(int[] intervals, String... values) { + Set queryExtractions = new HashSet<>(); + for (int interval : intervals) { + byte[] encodedInterval = new byte[4]; + IntPoint.encodeDimension(interval, encodedInterval, 0); + queryExtractions.add(new QueryAnalyzer.QueryExtraction(new QueryAnalyzer.Range("_field", null, null, encodedInterval))); + } + for (String value : values) { + queryExtractions.add(new QueryExtraction(new Term("_field", value))); + } + return queryExtractions; } }