diff --git a/docs/reference/mapping/types/percolator.asciidoc b/docs/reference/mapping/types/percolator.asciidoc index 4c7ff113fa9..ca8c8386e9b 100644 --- a/docs/reference/mapping/types/percolator.asciidoc +++ b/docs/reference/mapping/types/percolator.asciidoc @@ -71,11 +71,14 @@ a percolator query does not exist, it will be handled as a default string field fail. [float] -==== Important Notes +==== Limitations Because the `percolate` query is processing one document at a time, it doesn't support queries and filters that run against child documents such as `has_child` and `has_parent`. +The percolator doesn't accepts percolator queries containing `range` queries with ranges that are based on current +time (using `now`). + There are a number of queries that fetch data via a get call during query parsing. For example the `terms` query when using terms lookup, `template` query when using indexed scripts and `geo_shape` when using pre-indexed shapes. When these queries are indexed by the `percolator` field type then the get call is executed once. So each time the `percolator` diff --git a/docs/reference/migration/migrate_5_0/percolator.asciidoc b/docs/reference/migration/migrate_5_0/percolator.asciidoc index ae2057bddfb..f173a0df958 100644 --- a/docs/reference/migration/migrate_5_0/percolator.asciidoc +++ b/docs/reference/migration/migrate_5_0/percolator.asciidoc @@ -48,6 +48,11 @@ the existing document. The percolate stats have been removed. This is because the percolator no longer caches the percolator queries. +==== Percolator queries containing range queries with now ranges + +The percolator no longer accepts percolator queries containing `range` queries with ranges that are based on current +time (using `now`). + ==== Java client The percolator is no longer part of the core elasticsearch dependency. It has moved to the percolator module. diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/MultiPercolateAction.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/MultiPercolateAction.java index eefc6c996ba..d6eb5660728 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/MultiPercolateAction.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/MultiPercolateAction.java @@ -21,6 +21,7 @@ package org.elasticsearch.percolator; import org.elasticsearch.action.Action; import org.elasticsearch.client.ElasticsearchClient; +@Deprecated public class MultiPercolateAction extends Action { public static final MultiPercolateAction INSTANCE = new MultiPercolateAction(); diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateAction.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateAction.java index 64776f271ae..cebca9ed825 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateAction.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateAction.java @@ -22,6 +22,7 @@ package org.elasticsearch.percolator; import org.elasticsearch.action.Action; import org.elasticsearch.client.ElasticsearchClient; +@Deprecated public class PercolateAction extends Action { public static final PercolateAction INSTANCE = new PercolateAction(); diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java index 76bc136656c..40218e50a4f 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java @@ -22,13 +22,11 @@ package org.elasticsearch.percolator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.Weight; @@ -36,115 +34,39 @@ import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Bits; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; import java.io.IOException; import java.util.Objects; import java.util.Set; -import static org.apache.lucene.search.BooleanClause.Occur.FILTER; - -public final class PercolateQuery extends Query implements Accountable { +final class PercolateQuery extends Query implements Accountable { // cost of matching the query against the document, arbitrary as it would be really complex to estimate public static final float MATCH_COST = 1000; - public static class Builder { - - private final String docType; - private final QueryStore queryStore; - private final BytesReference documentSource; - private final IndexSearcher percolatorIndexSearcher; - - private Query queriesMetaDataQuery; - private Query verifiedQueriesQuery = new MatchNoDocsQuery(""); - private Query percolateTypeQuery; - - /** - * @param docType The type of the document being percolated - * @param queryStore The lookup holding all the percolator queries as Lucene queries. - * @param documentSource The source of the document being percolated - * @param percolatorIndexSearcher The index searcher on top of the in-memory index that holds the document being percolated - */ - public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) { - this.docType = Objects.requireNonNull(docType); - this.queryStore = Objects.requireNonNull(queryStore); - this.documentSource = Objects.requireNonNull(documentSource); - this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher); - } - - /** - * Optionally sets a query that reduces the number of queries to percolate based on extracted terms from - * the document to be percolated. - * @param extractedTermsFieldName The name of the field to get the extracted terms from - * @param extractionResultField The field to indicate for a document whether query term extraction was complete, - * partial or failed. If query extraction was complete, the MemoryIndex doesn't - */ - public void extractQueryTermsQuery(String extractedTermsFieldName, String extractionResultField) throws IOException { - // We can only skip the MemoryIndex verification when percolating a single document. - // When the document being percolated contains a nested object field then the MemoryIndex contains multiple - // documents. In this case the term query that indicates whether memory index verification can be skipped - // can incorrectly indicate that non nested queries would match, while their nested variants would not. - if (percolatorIndexSearcher.getIndexReader().maxDoc() == 1) { - this.verifiedQueriesQuery = new TermQuery(new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_COMPLETE)); - } - this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery( - percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName, - // include extractionResultField:failed, because docs with this term have no extractedTermsField - // and otherwise we would fail to return these docs. Docs that failed query term extraction - // always need to be verified by MemoryIndex: - new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_FAILED) - ); - } - - /** - * @param percolateTypeQuery A query that identifies all document containing percolator queries - */ - public void setPercolateTypeQuery(Query percolateTypeQuery) { - this.percolateTypeQuery = Objects.requireNonNull(percolateTypeQuery); - } - - public PercolateQuery build() { - if (percolateTypeQuery != null && queriesMetaDataQuery != null) { - throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata"); - } - // The query that selects which percolator queries will be evaluated by MemoryIndex: - BooleanQuery.Builder queriesQuery = new BooleanQuery.Builder(); - if (percolateTypeQuery != null) { - queriesQuery.add(percolateTypeQuery, FILTER); - } - if (queriesMetaDataQuery != null) { - queriesQuery.add(queriesMetaDataQuery, FILTER); - } - return new PercolateQuery(docType, queryStore, documentSource, queriesQuery.build(), percolatorIndexSearcher, - verifiedQueriesQuery); - } - - } - private final String documentType; private final QueryStore queryStore; private final BytesReference documentSource; - private final Query percolatorQueriesQuery; - private final Query verifiedQueriesQuery; + private final Query candidateMatchesQuery; + private final Query verifiedMatchesQuery; private final IndexSearcher percolatorIndexSearcher; - private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource, - Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedQueriesQuery) { - this.documentType = documentType; - this.documentSource = documentSource; - this.percolatorQueriesQuery = percolatorQueriesQuery; - this.queryStore = queryStore; - this.percolatorIndexSearcher = percolatorIndexSearcher; - this.verifiedQueriesQuery = verifiedQueriesQuery; + PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource, + Query candidateMatchesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedMatchesQuery) { + this.documentType = Objects.requireNonNull(documentType); + this.documentSource = Objects.requireNonNull(documentSource); + this.candidateMatchesQuery = Objects.requireNonNull(candidateMatchesQuery); + this.queryStore = Objects.requireNonNull(queryStore); + this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher); + this.verifiedMatchesQuery = Objects.requireNonNull(verifiedMatchesQuery); } @Override public Query rewrite(IndexReader reader) throws IOException { - Query rewritten = percolatorQueriesQuery.rewrite(reader); - if (rewritten != percolatorQueriesQuery) { + Query rewritten = candidateMatchesQuery.rewrite(reader); + if (rewritten != candidateMatchesQuery) { return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher, - verifiedQueriesQuery); + verifiedMatchesQuery); } else { return this; } @@ -152,8 +74,8 @@ public final class PercolateQuery extends Query implements Accountable { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - final Weight verifiedQueriesQueryWeight = verifiedQueriesQuery.createWeight(searcher, false); - final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores); + final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, false); + final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, false); return new Weight(this) { @Override public void extractTerms(Set set) { @@ -183,17 +105,17 @@ public final class PercolateQuery extends Query implements Accountable { @Override public float getValueForNormalization() throws IOException { - return innerWeight.getValueForNormalization(); + return candidateMatchesWeight.getValueForNormalization(); } @Override public void normalize(float v, float v1) { - innerWeight.normalize(v, v1); + candidateMatchesWeight.normalize(v, v1); } @Override public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException { - final Scorer approximation = innerWeight.scorer(leafReaderContext); + final Scorer approximation = candidateMatchesWeight.scorer(leafReaderContext); if (approximation == null) { return null; } @@ -226,7 +148,7 @@ public final class PercolateQuery extends Query implements Accountable { } }; } else { - Scorer verifiedDocsScorer = verifiedQueriesQueryWeight.scorer(leafReaderContext); + Scorer verifiedDocsScorer = verifiedMatchesWeight.scorer(leafReaderContext); Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer); return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) { @@ -293,7 +215,7 @@ public final class PercolateQuery extends Query implements Accountable { @Override public String toString(String s) { return "PercolateQuery{document_type={" + documentType + "},document_source={" + documentSource.utf8ToString() + - "},inner={" + percolatorQueriesQuery.toString(s) + "}}"; + "},inner={" + candidateMatchesQuery.toString(s) + "}}"; } @Override diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQueryBuilder.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQueryBuilder.java index a5db24a71ef..132970d585d 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQueryBuilder.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQueryBuilder.java @@ -50,6 +50,7 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.XContent; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -57,7 +58,6 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.FieldNameAnalyzer; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapperForType; @@ -406,37 +406,27 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder { LeafReader leafReader = ctx.reader(); - BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues(fieldType.getQueryBuilderFieldName()); + BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues(fieldType.queryBuilderField.name()); if (binaryDocValues == null) { return docId -> null; } - Bits bits = leafReader.getDocsWithField(fieldType.getQueryBuilderFieldName()); + Bits bits = leafReader.getDocsWithField(fieldType.queryBuilderField.name()); return docId -> { if (bits.get(docId)) { BytesRef qbSource = binaryDocValues.get(docId); diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java index 1428b8116a8..4cd3bdabbd8 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java @@ -20,10 +20,22 @@ package org.elasticsearch.percolator; import org.apache.lucene.document.Field; import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.queries.TermsQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -38,12 +50,18 @@ import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.core.BinaryFieldMapper; import org.elasticsearch.index.mapper.core.KeywordFieldMapper; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.BoostingQueryBuilder; +import org.elasticsearch.index.query.ConstantScoreQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryParseContext; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; @@ -56,7 +74,12 @@ public class PercolatorFieldMapper extends FieldMapper { public static final Setting INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING = Setting.boolSetting("index.percolator.map_unmapped_fields_as_string", false, Setting.Property.IndexScope); public static final String CONTENT_TYPE = "percolator"; - private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType(); + private static final FieldType FIELD_TYPE = new FieldType(); + + static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point + static final String EXTRACTION_COMPLETE = "complete"; + static final String EXTRACTION_PARTIAL = "partial"; + static final String EXTRACTION_FAILED = "failed"; public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms"; public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result"; @@ -74,12 +97,13 @@ public class PercolatorFieldMapper extends FieldMapper { @Override public PercolatorFieldMapper build(BuilderContext context) { context.path().add(name()); + FieldType fieldType = (FieldType) this.fieldType; KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context); - ((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType(); + fieldType.queryTermsField = extractedTermsField.fieldType(); KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context); - ((PercolatorFieldType) fieldType).extractionResultField = extractionResultField.fieldType(); + fieldType.extractionResultField = extractionResultField.fieldType(); BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context); - ((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType(); + fieldType.queryBuilderField = queryBuilderField.fieldType(); context.path().remove(); setupFieldType(context); return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(), @@ -114,40 +138,28 @@ public class PercolatorFieldMapper extends FieldMapper { } } - public static class PercolatorFieldType extends MappedFieldType { + public static class FieldType extends MappedFieldType { - private MappedFieldType queryTermsField; - private MappedFieldType extractionResultField; - private MappedFieldType queryBuilderField; + MappedFieldType queryTermsField; + MappedFieldType extractionResultField; + MappedFieldType queryBuilderField; - public PercolatorFieldType() { + public FieldType() { setIndexOptions(IndexOptions.NONE); setDocValuesType(DocValuesType.NONE); setStored(false); } - public PercolatorFieldType(PercolatorFieldType ref) { + public FieldType(FieldType ref) { super(ref); queryTermsField = ref.queryTermsField; extractionResultField = ref.extractionResultField; queryBuilderField = ref.queryBuilderField; } - public String getExtractedTermsField() { - return queryTermsField.name(); - } - - public String getExtractionResultFieldName() { - return extractionResultField.name(); - } - - public String getQueryBuilderFieldName() { - return queryBuilderField.name(); - } - @Override public MappedFieldType clone() { - return new PercolatorFieldType(this); + return new FieldType(this); } @Override @@ -159,6 +171,52 @@ public class PercolatorFieldMapper extends FieldMapper { public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "Percolator fields are not searchable directly, use a percolate query instead"); } + + public Query percolateQuery(String documentType, PercolateQuery.QueryStore queryStore, BytesReference documentSource, + IndexSearcher searcher) throws IOException { + IndexReader indexReader = searcher.getIndexReader(); + Query candidateMatchesQuery = createCandidateQuery(indexReader); + Query verifiedMatchesQuery; + // We can only skip the MemoryIndex verification when percolating a single document. + // When the document being percolated contains a nested object field then the MemoryIndex contains multiple + // documents. In this case the term query that indicates whether memory index verification can be skipped + // can incorrectly indicate that non nested queries would match, while their nested variants would not. + if (indexReader.maxDoc() == 1) { + verifiedMatchesQuery = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_COMPLETE)); + } else { + verifiedMatchesQuery = new MatchNoDocsQuery("nested docs, so no verified matches"); + } + return new PercolateQuery(documentType, queryStore, documentSource, candidateMatchesQuery, searcher, verifiedMatchesQuery); + } + + Query createCandidateQuery(IndexReader indexReader) throws IOException { + List extractedTerms = new ArrayList<>(); + // include extractionResultField:failed, because docs with this term have no extractedTermsField + // and otherwise we would fail to return these docs. Docs that failed query term extraction + // always need to be verified by MemoryIndex: + extractedTerms.add(new Term(extractionResultField.name(), EXTRACTION_FAILED)); + + LeafReader reader = indexReader.leaves().get(0).reader(); + Fields fields = reader.fields(); + for (String field : fields) { + Terms terms = fields.terms(field); + if (terms == null) { + continue; + } + + BytesRef fieldBr = new BytesRef(field); + TermsEnum tenum = terms.iterator(); + for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { + BytesRefBuilder builder = new BytesRefBuilder(); + builder.append(fieldBr); + builder.append(FIELD_VALUE_SEPARATOR); + builder.append(term); + extractedTerms.add(new Term(queryTermsField.name(), builder.toBytesRef())); + } + } + return new TermsQuery(extractedTerms); + } + } private final boolean mapUnmappedFieldAsString; @@ -211,6 +269,7 @@ public class PercolatorFieldMapper extends FieldMapper { XContentParser parser = context.parser(); QueryBuilder queryBuilder = parseQueryBuilder(queryShardContext.newParseContext(parser), parser.getTokenLocation()); + verifyRangeQueries(queryBuilder); // Fetching of terms, shapes and indexed scripts happen during this rewrite: queryBuilder = queryBuilder.rewrite(queryShardContext); @@ -222,11 +281,34 @@ public class PercolatorFieldMapper extends FieldMapper { } Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder); - ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), extractionResultField.name(), - queryTermsField.fieldType()); + processQuery(query, context); return null; } + void processQuery(Query query, ParseContext context) { + ParseContext.Document doc = context.doc(); + FieldType pft = (FieldType) this.fieldType(); + QueryAnalyzer.Result result; + try { + result = QueryAnalyzer.analyze(query); + } catch (QueryAnalyzer.UnsupportedQueryException e) { + doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); + return; + } + for (Term term : result.terms) { + BytesRefBuilder builder = new BytesRefBuilder(); + builder.append(new BytesRef(term.field())); + builder.append(FIELD_VALUE_SEPARATOR); + builder.append(term.bytes()); + doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType())); + } + if (result.verified) { + doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType())); + } else { + doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType())); + } + } + public static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException { return toQuery(context, mapUnmappedFieldsAsString, parseQueryBuilder(context.newParseContext(parser), parser.getTokenLocation())); } @@ -273,4 +355,38 @@ public class PercolatorFieldMapper extends FieldMapper { return CONTENT_TYPE; } + /** + * Fails if a range query with a date range is found based on current time + */ + static void verifyRangeQueries(QueryBuilder queryBuilder) { + if (queryBuilder instanceof RangeQueryBuilder) { + RangeQueryBuilder rangeQueryBuilder = (RangeQueryBuilder) queryBuilder; + if (rangeQueryBuilder.from() instanceof String) { + String from = (String) rangeQueryBuilder.from(); + String to = (String) rangeQueryBuilder.to(); + if (from.contains("now") || to.contains("now")) { + throw new IllegalArgumentException("Percolator queries containing time range queries based on the " + + "current time are forbidden"); + } + } + } else if (queryBuilder instanceof BoolQueryBuilder) { + BoolQueryBuilder boolQueryBuilder = (BoolQueryBuilder) queryBuilder; + List clauses = new ArrayList<>(); + clauses.addAll(boolQueryBuilder.filter()); + clauses.addAll(boolQueryBuilder.must()); + clauses.addAll(boolQueryBuilder.mustNot()); + clauses.addAll(boolQueryBuilder.should()); + for (QueryBuilder clause : clauses) { + verifyRangeQueries(clause); + } + } else if (queryBuilder instanceof ConstantScoreQueryBuilder) { + verifyRangeQueries(((ConstantScoreQueryBuilder) queryBuilder).innerQuery()); + } else if (queryBuilder instanceof FunctionScoreQueryBuilder) { + verifyRangeQueries(((FunctionScoreQueryBuilder) queryBuilder).query()); + } else if (queryBuilder instanceof BoostingQueryBuilder) { + verifyRangeQueries(((BoostingQueryBuilder) queryBuilder).negativeQuery()); + verifyRangeQueries(((BoostingQueryBuilder) queryBuilder).positiveQuery()); + } + } + } diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/ExtractQueryTermsService.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java similarity index 73% rename from modules/percolator/src/main/java/org/elasticsearch/percolator/ExtractQueryTermsService.java rename to modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 44048935304..8154c632907 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/ExtractQueryTermsService.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -18,15 +18,8 @@ */ package org.elasticsearch.percolator; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.PrefixCodedTerms; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.queries.TermsQuery; @@ -46,37 +39,25 @@ import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; -import org.elasticsearch.index.mapper.ParseContext; -import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.function.Function; -/** - * Utility to extract query terms from queries and create queries from documents. - */ -public final class ExtractQueryTermsService { +public final class QueryAnalyzer { - private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point - public static final String EXTRACTION_COMPLETE = "complete"; - public static final String EXTRACTION_PARTIAL = "partial"; - public static final String EXTRACTION_FAILED = "failed"; - - static final Map, Function> queryProcessors; + private static final Map, Function> queryProcessors; static { - Map, Function> map = new HashMap<>(17); + Map, Function> map = new HashMap<>(); map.put(MatchNoDocsQuery.class, matchNoDocsQuery()); map.put(ConstantScoreQuery.class, constantScoreQuery()); map.put(BoostQuery.class, boostQuery()); @@ -97,83 +78,34 @@ public final class ExtractQueryTermsService { queryProcessors = Collections.unmodifiableMap(map); } - private ExtractQueryTermsService() { + private QueryAnalyzer() { } /** - * Extracts all terms from the specified query and adds it to the specified document. + * Extracts terms from the provided query. These terms are stored with the percolator query and + * used by the percolate query's candidate query as fields to be query by. The candidate query + * holds the terms from the document to be percolated and allows to the percolate query to ignore + * percolator queries that we know would otherwise never match. * - * @param query The query to extract terms from - * @param document The document to add the extracted terms to - * @param queryTermsFieldField The field in the document holding the extracted terms - * @param extractionResultField The field contains whether query term extraction was successful, partial or - * failed. (For example the query contained an unsupported query (e.g. WildcardQuery) - * then query extraction would fail) - * @param fieldType The field type for the query metadata field - */ - public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField, - String extractionResultField, FieldType fieldType) { - Result result; - try { - result = extractQueryTerms(query); - } catch (UnsupportedQueryException e) { - document.add(new Field(extractionResultField, EXTRACTION_FAILED, fieldType)); - return; - } - for (Term term : result.terms) { - BytesRefBuilder builder = new BytesRefBuilder(); - builder.append(new BytesRef(term.field())); - builder.append(FIELD_VALUE_SEPARATOR); - builder.append(term.bytes()); - document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType)); - } - if (result.verified) { - document.add(new Field(extractionResultField, EXTRACTION_COMPLETE, fieldType)); - } else { - document.add(new Field(extractionResultField, EXTRACTION_PARTIAL, fieldType)); - } - } - - /** - * Creates a terms query containing all terms from all fields of the specified index reader. - */ - public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField, - Term... optionalTerms) throws IOException { - Objects.requireNonNull(queryMetadataField); - - List extractedTerms = new ArrayList<>(); - Collections.addAll(extractedTerms, optionalTerms); - - Fields fields = MultiFields.getFields(indexReader); - for (String field : fields) { - Terms terms = fields.terms(field); - if (terms == null) { - continue; - } - - BytesRef fieldBr = new BytesRef(field); - TermsEnum tenum = terms.iterator(); - for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { - BytesRefBuilder builder = new BytesRefBuilder(); - builder.append(fieldBr); - builder.append(FIELD_VALUE_SEPARATOR); - builder.append(term); - extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef())); - } - } - return new TermsQuery(extractedTerms); - } - - /** - * Extracts all query terms from the provided query and adds it to specified list. *

- * From boolean query with no should clauses or phrase queries only the longest term are selected, + * When extracting the terms for the specified query, we can also determine if the percolator query is + * always going to match. For example if a percolator query just contains a term query or a disjunction + * query then when the candidate query matches with that, we know the entire percolator query always + * matches. This allows the percolate query to skip the expensive memory index verification step that + * it would otherwise have to execute (for example when a percolator query contains a phrase query or a + * conjunction query). + * + *

+ * The query analyzer doesn't always extract all terms from the specified query. For example from a + * boolean query with no should clauses or phrase queries only the longest term are selected, * since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored. + * *

- * If from part of the query, no query terms can be extracted then term extraction is stopped and - * an UnsupportedQueryException is thrown. + * Sometimes the query analyzer can't always extract terms from a sub query, if that happens then + * query analysis is stopped and an UnsupportedQueryException is thrown. So that the caller can mark + * this query in such a way that the PercolatorQuery always verifies if this query with the MemoryIndex. */ - static Result extractQueryTerms(Query query) { + public static Result analyze(Query query) { Class queryClass = query.getClass(); if (queryClass.isAnonymousClass()) { // Sometimes queries have anonymous classes in that case we need the direct super class. @@ -195,14 +127,14 @@ public final class ExtractQueryTermsService { static Function constantScoreQuery() { return query -> { Query wrappedQuery = ((ConstantScoreQuery) query).getQuery(); - return extractQueryTerms(wrappedQuery); + return analyze(wrappedQuery); }; } static Function boostQuery() { return query -> { Query wrappedQuery = ((BoostQuery) query).getQuery(); - return extractQueryTerms(wrappedQuery); + return analyze(wrappedQuery); }; } @@ -277,7 +209,7 @@ public final class ExtractQueryTermsService { Set bestClauses = null; SpanNearQuery spanNearQuery = (SpanNearQuery) query; for (SpanQuery clause : spanNearQuery.getClauses()) { - Result temp = extractQueryTerms(clause); + Result temp = analyze(clause); bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses); } return new Result(false, bestClauses); @@ -289,7 +221,7 @@ public final class ExtractQueryTermsService { Set terms = new HashSet<>(); SpanOrQuery spanOrQuery = (SpanOrQuery) query; for (SpanQuery clause : spanOrQuery.getClauses()) { - terms.addAll(extractQueryTerms(clause).terms); + terms.addAll(analyze(clause).terms); } return new Result(false, terms); }; @@ -297,14 +229,14 @@ public final class ExtractQueryTermsService { static Function spanNotQuery() { return query -> { - Result result = extractQueryTerms(((SpanNotQuery) query).getInclude()); + Result result = analyze(((SpanNotQuery) query).getInclude()); return new Result(false, result.terms); }; } static Function spanFirstQuery() { return query -> { - Result result = extractQueryTerms(((SpanFirstQuery) query).getMatch()); + Result result = analyze(((SpanFirstQuery) query).getMatch()); return new Result(false, result.terms); }; } @@ -341,7 +273,7 @@ public final class ExtractQueryTermsService { Result temp; try { - temp = extractQueryTerms(clause.getQuery()); + temp = analyze(clause.getQuery()); } catch (UnsupportedQueryException e) { uqe = e; continue; @@ -381,7 +313,7 @@ public final class ExtractQueryTermsService { static Function functionScoreQuery() { return query -> { FunctionScoreQuery functionScoreQuery = (FunctionScoreQuery) query; - Result result = extractQueryTerms(functionScoreQuery.getSubQuery()); + Result result = analyze(functionScoreQuery.getSubQuery()); // If min_score is specified we can't guarantee upfront that this percolator query matches, // so in that case we set verified to false. // (if it matches with the percolator document matches with the extracted terms. @@ -395,7 +327,7 @@ public final class ExtractQueryTermsService { boolean verified = minimumShouldMatch <= 1 && otherClauses == false; Set terms = new HashSet<>(); for (Query disjunct : disjunctions) { - Result subResult = extractQueryTerms(disjunct); + Result subResult = analyze(disjunct); if (subResult.verified == false) { verified = false; } diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/RestMultiPercolateAction.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/RestMultiPercolateAction.java index 5e3a6f90756..3045fa08a09 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/RestMultiPercolateAction.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/RestMultiPercolateAction.java @@ -33,6 +33,7 @@ import org.elasticsearch.rest.action.support.RestToXContentListener; import static org.elasticsearch.rest.RestRequest.Method.GET; import static org.elasticsearch.rest.RestRequest.Method.POST; +@Deprecated public class RestMultiPercolateAction extends BaseRestHandler { private final boolean allowExplicitIndex; diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/RestPercolateAction.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/RestPercolateAction.java index bdbe4921f09..1f8c99b2e97 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/RestPercolateAction.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/RestPercolateAction.java @@ -35,6 +35,7 @@ import org.elasticsearch.rest.action.support.RestToXContentListener; import static org.elasticsearch.rest.RestRequest.Method.GET; import static org.elasticsearch.rest.RestRequest.Method.POST; +@Deprecated public class RestPercolateAction extends BaseRestHandler { @Inject public RestPercolateAction(Settings settings, RestController controller) { diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportMultiPercolateAction.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportMultiPercolateAction.java index 9968035ec85..0bd8b15bfb7 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportMultiPercolateAction.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportMultiPercolateAction.java @@ -49,6 +49,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +@Deprecated public class TransportMultiPercolateAction extends HandledTransportAction { private final Client client; diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportPercolateAction.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportPercolateAction.java index beca222b755..dbbfd09619b 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportPercolateAction.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/TransportPercolateAction.java @@ -57,6 +57,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +@Deprecated public class TransportPercolateAction extends HandledTransportAction { private final Client client; diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java new file mode 100644 index 00000000000..8ac9890afa8 --- /dev/null +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java @@ -0,0 +1,436 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.percolator; + +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.queries.BlendedTermQuery; +import org.apache.lucene.queries.CommonTermsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.FilterScorer; +import org.apache.lucene.search.FilteredDocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanNotQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +import static org.hamcrest.Matchers.equalTo; + +public class CandidateQueryTests extends ESSingleNodeTestCase { + + private Directory directory; + private IndexWriter indexWriter; + private DocumentMapper documentMapper; + private DirectoryReader directoryReader; + private MapperService mapperService; + + private PercolatorFieldMapper fieldMapper; + private PercolatorFieldMapper.FieldType fieldType; + + private List queries; + private PercolateQuery.QueryStore queryStore; + + @Override + protected Collection> getPlugins() { + return Collections.singleton(PercolatorPlugin.class); + } + + @Before + public void init() throws Exception { + directory = newDirectory(); + IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); + config.setMergePolicy(NoMergePolicy.INSTANCE); + indexWriter = new IndexWriter(directory, config); + + String indexName = "test"; + IndexService indexService = createIndex(indexName, Settings.EMPTY); + mapperService = indexService.mapperService(); + + String mapper = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("int_field").field("type", "integer").endObject() + .startObject("long_field").field("type", "long").endObject() + .startObject("half_float_field").field("type", "half_float").endObject() + .startObject("float_field").field("type", "float").endObject() + .startObject("double_field").field("type", "double").endObject() + .startObject("ip_field").field("type", "ip").endObject() + .startObject("field").field("type", "keyword").endObject() + .endObject().endObject().endObject().string(); + documentMapper = mapperService.merge("type", new CompressedXContent(mapper), MapperService.MergeReason.MAPPING_UPDATE, true); + + String queryField = "query_field"; + String mappingType = "query"; + String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(mappingType) + .startObject("properties").startObject(queryField).field("type", "percolator").endObject().endObject() + .endObject().endObject().string(); + mapperService.merge(mappingType, new CompressedXContent(percolatorMapper), MapperService.MergeReason.MAPPING_UPDATE, true); + fieldMapper = (PercolatorFieldMapper) mapperService.documentMapper(mappingType).mappers().getMapper(queryField); + fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + + queries = new ArrayList<>(); + queryStore = ctx -> docId -> this.queries.get(docId); + } + + @After + public void deinit() throws Exception { + directoryReader.close(); + directory.close(); + } + + public void testDuel() throws Exception { + List> queryFunctions = new ArrayList<>(); + queryFunctions.add((id) -> new PrefixQuery(new Term("field", id))); + queryFunctions.add((id) -> new WildcardQuery(new Term("field", id + "*"))); + queryFunctions.add((id) -> new CustomQuery(new Term("field", id))); + queryFunctions.add((id) -> new SpanTermQuery(new Term("field", id))); + queryFunctions.add((id) -> new TermQuery(new Term("field", id))); + queryFunctions.add((id) -> { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + return builder.build(); + }); + queryFunctions.add((id) -> { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST); + if (randomBoolean()) { + builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); + } + if (randomBoolean()) { + builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST); + } + return builder.build(); + }); + queryFunctions.add((id) -> { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); + if (randomBoolean()) { + builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); + } + if (randomBoolean()) { + builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); + } + return builder.build(); + }); + queryFunctions.add((id) -> { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + if (randomBoolean()) { + builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); + } + return builder.build(); + }); + queryFunctions.add((id) -> { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + if (randomBoolean()) { + builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); + } + return builder.build(); + }); + queryFunctions.add((id) -> { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4)); + builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); + builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); + return builder.build(); + }); + queryFunctions.add((id) -> new MatchAllDocsQuery()); + queryFunctions.add((id) -> new MatchNoDocsQuery("no reason at all")); + + int numDocs = randomIntBetween(queryFunctions.size(), queryFunctions.size() * 3); + List documents = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + String id = Integer.toString(i); + Query query = queryFunctions.get(i % queryFunctions.size()).apply(id); + addQuery(query, documents); + } + + indexWriter.addDocuments(documents); + indexWriter.close(); + directoryReader = DirectoryReader.open(directory); + IndexSearcher shardSearcher = newSearcher(directoryReader); + // Disable query cache, because ControlQuery cannot be cached... + shardSearcher.setQueryCache(null); + + for (int i = 0; i < numDocs; i++) { + String id = Integer.toString(i); + Iterable doc = Collections.singleton(new StringField("field", id, Field.Store.NO)); + MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + } + + Iterable doc = Collections.singleton(new StringField("field", "value", Field.Store.NO)); + MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + // Empty percolator doc: + memoryIndex = new MemoryIndex(); + duelRun(queryStore, memoryIndex, shardSearcher); + } + + public void testDuelSpecificQueries() throws Exception { + List documents = new ArrayList<>(); + + CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); + commonTermsQuery.add(new Term("field", "quick")); + commonTermsQuery.add(new Term("field", "brown")); + commonTermsQuery.add(new Term("field", "fox")); + addQuery(commonTermsQuery, documents); + + BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery(new Term[]{new Term("field", "quick"), + new Term("field", "brown"), new Term("field", "fox")}, false); + addQuery(blendedTermQuery, documents); + + SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "quick"))) + .addClause(new SpanTermQuery(new Term("field", "brown"))) + .addClause(new SpanTermQuery(new Term("field", "fox"))) + .build(); + addQuery(spanNearQuery, documents); + + SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "the"))) + .addClause(new SpanTermQuery(new Term("field", "lazy"))) + .addClause(new SpanTermQuery(new Term("field", "doc"))) + .build(); + SpanOrQuery spanOrQuery = new SpanOrQuery( + spanNearQuery, + spanNearQuery2 + ); + addQuery(spanOrQuery, documents); + + SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery); + addQuery(spanNotQuery, documents); + + long lowerLong = randomIntBetween(0, 256); + long upperLong = lowerLong + randomIntBetween(0, 32); + addQuery(LongPoint.newRangeQuery("long_field", lowerLong, upperLong), documents); + + indexWriter.addDocuments(documents); + indexWriter.close(); + directoryReader = DirectoryReader.open(directory); + IndexSearcher shardSearcher = newSearcher(directoryReader); + // Disable query cache, because ControlQuery cannot be cached... + shardSearcher.setQueryCache(null); + + Document document = new Document(); + document.add(new TextField("field", "the quick brown fox jumps over the lazy dog", Field.Store.NO)); + long randomLong = randomIntBetween((int) lowerLong, (int) upperLong); + document.add(new LongPoint("long_field", randomLong)); + MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer()); + duelRun(queryStore, memoryIndex, shardSearcher); + } + + private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException { + boolean requireScore = randomBoolean(); + IndexSearcher percolateSearcher = memoryIndex.createSearcher(); + Query percolateQuery = fieldType.percolateQuery("type", queryStore, new BytesArray("{}"), percolateSearcher); + Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery); + TopDocs topDocs = shardSearcher.search(query, 10); + + Query controlQuery = new ControlQuery(memoryIndex, queryStore); + controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery); + TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10); + assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits)); + assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length)); + for (int j = 0; j < topDocs.scoreDocs.length; j++) { + assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc)); + assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score)); + if (requireScore) { + Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc); + Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc); + assertThat(explain1.isMatch(), equalTo(explain2.isMatch())); + assertThat(explain1.getValue(), equalTo(explain2.getValue())); + } + } + } + + private void addQuery(Query query, List docs) throws IOException { + ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, + mapperService.documentMapperParser(), documentMapper, null, null); + fieldMapper.processQuery(query, parseContext); + docs.add(parseContext.doc()); + queries.add(query); + } + + private static final class CustomQuery extends Query { + + private final Term term; + + private CustomQuery(Term term) { + this.term = term; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + return new TermQuery(term); + } + + @Override + public String toString(String field) { + return "custom{" + field + "}"; + } + + @Override + public boolean equals(Object obj) { + return sameClassAs(obj); + } + + @Override + public int hashCode() { + return classHash(); + } + } + + private static final class ControlQuery extends Query { + + private final MemoryIndex memoryIndex; + private final PercolateQuery.QueryStore queryStore; + + private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) { + this.memoryIndex = memoryIndex; + this.queryStore = queryStore; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) { + return new ConstantScoreWeight(this) { + + float _score; + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + Scorer scorer = scorer(context); + if (scorer != null) { + int result = scorer.iterator().advance(doc); + if (result == doc) { + return Explanation.match(scorer.score(), "ControlQuery"); + } + } + return Explanation.noMatch("ControlQuery"); + } + + @Override + public String toString() { + return "weight(" + ControlQuery.this + ")"; + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc()); + PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context); + FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) { + + @Override + protected boolean match(int doc) { + try { + Query query = leaf.getQuery(doc); + float score = memoryIndex.search(query); + if (score != 0f) { + if (needsScores) { + _score = score; + } + return true; + } else { + return false; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }; + return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) { + + @Override + public float score() throws IOException { + return _score; + } + }; + } + }; + } + + @Override + public String toString(String field) { + return "control{" + field + "}"; + } + + @Override + public boolean equals(Object obj) { + return sameClassAs(obj); + } + + @Override + public int hashCode() { + return classHash(); + } + + } + +} diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryTests.java index c2c2a641a71..eda9ce15c73 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryTests.java @@ -21,60 +21,36 @@ package org.elasticsearch.percolator; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.memory.MemoryIndex; -import org.apache.lucene.queries.BlendedTermQuery; -import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.FilterScorer; -import org.apache.lucene.search.FilteredDocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.Weight; -import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanNotQuery; -import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; -import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.index.mapper.Uid; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.test.ESTestCase; import org.junit.After; import org.junit.Before; -import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; +import java.util.Collections; import java.util.List; -import java.util.Map; -import java.util.function.Function; import static org.hamcrest.Matchers.arrayWithSize; import static org.hamcrest.Matchers.equalTo; @@ -82,34 +58,13 @@ import static org.hamcrest.Matchers.is; public class PercolateQueryTests extends ESTestCase { - public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms"; - public static final String UNKNOWN_QUERY_FIELD_NAME = "unknown_query"; - public static final FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType(); - - static { - EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false); - EXTRACTED_TERMS_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); - EXTRACTED_TERMS_FIELD_TYPE.freeze(); - } - private Directory directory; private IndexWriter indexWriter; - private Map queries; - private PercolateQuery.QueryStore queryStore; private DirectoryReader directoryReader; @Before public void init() throws Exception { directory = newDirectory(); - queries = new HashMap<>(); - queryStore = ctx -> docId -> { - try { - String val = ctx.reader().document(docId).get(UidFieldMapper.NAME); - return queries.get(Uid.createUid(val).id()); - } catch (IOException e) { - throw new RuntimeException(e); - } - }; IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); config.setMergePolicy(NoMergePolicy.INSTANCE); indexWriter = new IndexWriter(directory, config); @@ -121,31 +76,38 @@ public class PercolateQueryTests extends ESTestCase { directory.close(); } - public void testVariousQueries() throws Exception { - addPercolatorQuery("1", new TermQuery(new Term("field", "brown"))); - addPercolatorQuery("2", new TermQuery(new Term("field", "monkey"))); - addPercolatorQuery("3", new TermQuery(new Term("field", "fox"))); - BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); - bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD); - bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD); - addPercolatorQuery("4", bq1.build()); - BooleanQuery.Builder bq2 = new BooleanQuery.Builder(); - bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); - bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST); - addPercolatorQuery("5", bq2.build()); - BooleanQuery.Builder bq3 = new BooleanQuery.Builder(); - bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); - bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT); - addPercolatorQuery("6", bq3.build()); - BooleanQuery.Builder bq4 = new BooleanQuery.Builder(); - bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT); - bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST); - addPercolatorQuery("7", bq4.build()); - PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); - pq1.add(new Term("field", "lazy")); - pq1.add(new Term("field", "dog")); - addPercolatorQuery("8", pq1.build()); + public void testPercolateQuery() throws Exception { + List> docs = new ArrayList<>(); + List queries = new ArrayList<>(); + PercolateQuery.QueryStore queryStore = ctx -> queries::get; + queries.add(new TermQuery(new Term("field", "fox"))); + docs.add(Collections.singleton(new StringField("select", "a", Field.Store.NO))); + + SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true); + snp.addClause(new SpanTermQuery(new Term("field", "jumps"))); + snp.addClause(new SpanTermQuery(new Term("field", "lazy"))); + snp.addClause(new SpanTermQuery(new Term("field", "dog"))); + snp.setSlop(2); + queries.add(snp.build()); + docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO))); + + PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); + pq1.add(new Term("field", "quick")); + pq1.add(new Term("field", "brown")); + pq1.add(new Term("field", "jumps")); + pq1.setSlop(1); + queries.add(pq1.build()); + docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO))); + + BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); + bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST); + bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST); + bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); + queries.add(bq1.build()); + docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO))); + + indexWriter.addDocuments(docs); indexWriter.close(); directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); @@ -153,26 +115,26 @@ public class PercolateQueryTests extends ESTestCase { MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); - - PercolateQuery.Builder builder = new PercolateQuery.Builder( - "docType", - queryStore, - new BytesArray("{}"), - percolateSearcher - ); - builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); // no scoring, wrapping it in a constant score query: - Query query = new ConstantScoreQuery(builder.build()); + Query query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("a"), + new TermQuery(new Term("select", "a")), percolateSearcher, new MatchNoDocsQuery(""))); TopDocs topDocs = shardSearcher.search(query, 10); - assertThat(topDocs.totalHits, equalTo(5)); - assertThat(topDocs.scoreDocs.length, equalTo(5)); + assertThat(topDocs.totalHits, equalTo(1)); + assertThat(topDocs.scoreDocs.length, equalTo(1)); assertThat(topDocs.scoreDocs[0].doc, equalTo(0)); Explanation explanation = shardSearcher.explain(query, 0); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score)); + query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("b"), + new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery(""))); + topDocs = shardSearcher.search(query, 10); + assertThat(topDocs.totalHits, equalTo(3)); + assertThat(topDocs.scoreDocs.length, equalTo(3)); + assertThat(topDocs.scoreDocs[0].doc, equalTo(1)); explanation = shardSearcher.explain(query, 1); - assertThat(explanation.isMatch(), is(false)); + assertThat(explanation.isMatch(), is(true)); + assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score)); assertThat(topDocs.scoreDocs[1].doc, equalTo(2)); explanation = shardSearcher.explain(query, 2); @@ -180,371 +142,37 @@ public class PercolateQueryTests extends ESTestCase { assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score)); assertThat(topDocs.scoreDocs[2].doc, equalTo(3)); - explanation = shardSearcher.explain(query, 3); + explanation = shardSearcher.explain(query, 2); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score)); - explanation = shardSearcher.explain(query, 4); - assertThat(explanation.isMatch(), is(false)); + query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("c"), + new MatchAllDocsQuery(), percolateSearcher, new MatchAllDocsQuery())); + topDocs = shardSearcher.search(query, 10); + assertThat(topDocs.totalHits, equalTo(4)); - assertThat(topDocs.scoreDocs[3].doc, equalTo(5)); - explanation = shardSearcher.explain(query, 5); - assertThat(explanation.isMatch(), is(true)); - assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[3].score)); - - explanation = shardSearcher.explain(query, 6); - assertThat(explanation.isMatch(), is(false)); - - assertThat(topDocs.scoreDocs[4].doc, equalTo(7)); - explanation = shardSearcher.explain(query, 7); - assertThat(explanation.isMatch(), is(true)); - assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[4].score)); - } - - public void testVariousQueries_withScoring() throws Exception { - SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true); - snp.addClause(new SpanTermQuery(new Term("field", "jumps"))); - snp.addClause(new SpanTermQuery(new Term("field", "lazy"))); - snp.addClause(new SpanTermQuery(new Term("field", "dog"))); - snp.setSlop(2); - addPercolatorQuery("1", snp.build()); - PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); - pq1.add(new Term("field", "quick")); - pq1.add(new Term("field", "brown")); - pq1.add(new Term("field", "jumps")); - pq1.setSlop(1); - addPercolatorQuery("2", pq1.build()); - BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); - bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST); - bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST); - bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); - addPercolatorQuery("3", bq1.build()); - - indexWriter.close(); - directoryReader = DirectoryReader.open(directory); - IndexSearcher shardSearcher = newSearcher(directoryReader); - - MemoryIndex memoryIndex = new MemoryIndex(); - memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); - IndexSearcher percolateSearcher = memoryIndex.createSearcher(); - - PercolateQuery.Builder builder = new PercolateQuery.Builder( - "docType", - queryStore, - new BytesArray("{}"), - percolateSearcher - ); - builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); - Query query = builder.build(); - TopDocs topDocs = shardSearcher.search(query, 10); + query = new PercolateQuery("type", queryStore, new BytesArray("{}"), new TermQuery(new Term("select", "b")), + percolateSearcher, new MatchNoDocsQuery("")); + topDocs = shardSearcher.search(query, 10); assertThat(topDocs.totalHits, equalTo(3)); - - assertThat(topDocs.scoreDocs[0].doc, equalTo(2)); - Explanation explanation = shardSearcher.explain(query, 2); + assertThat(topDocs.scoreDocs.length, equalTo(3)); + assertThat(topDocs.scoreDocs[0].doc, equalTo(3)); + explanation = shardSearcher.explain(query, 3); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); - assertThat(topDocs.scoreDocs[1].doc, equalTo(1)); - explanation = shardSearcher.explain(query, 1); + assertThat(topDocs.scoreDocs[1].doc, equalTo(2)); + explanation = shardSearcher.explain(query, 2); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); - assertThat(topDocs.scoreDocs[2].doc, equalTo(0)); - explanation = shardSearcher.explain(query, 0); + assertThat(topDocs.scoreDocs[2].doc, equalTo(1)); + explanation = shardSearcher.explain(query, 1); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); } - public void testDuel() throws Exception { - List> queries = new ArrayList<>(); - queries.add((id) -> new PrefixQuery(new Term("field", id))); - queries.add((id) -> new WildcardQuery(new Term("field", id + "*"))); - queries.add((id) -> new CustomQuery(new Term("field", id))); - queries.add((id) -> new SpanTermQuery(new Term("field", id))); - queries.add((id) -> new TermQuery(new Term("field", id))); - queries.add((id) -> { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - return builder.build(); - }); - queries.add((id) -> { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST); - if (randomBoolean()) { - builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); - } - if (randomBoolean()) { - builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST); - } - return builder.build(); - }); - queries.add((id) -> { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); - if (randomBoolean()) { - builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); - } - if (randomBoolean()) { - builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); - } - return builder.build(); - }); - queries.add((id) -> { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); - builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); - if (randomBoolean()) { - builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); - } - return builder.build(); - }); - queries.add((id) -> { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); - builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); - if (randomBoolean()) { - builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); - } - return builder.build(); - }); - queries.add((id) -> { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4)); - builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); - builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); - return builder.build(); - }); - queries.add((id) -> new MatchAllDocsQuery()); - queries.add((id) -> new MatchNoDocsQuery("no reason at all")); - - int numDocs = randomIntBetween(queries.size(), queries.size() * 3); - for (int i = 0; i < numDocs; i++) { - String id = Integer.toString(i); - addPercolatorQuery(id, queries.get(i % queries.size()).apply(id)); - } - - indexWriter.close(); - directoryReader = DirectoryReader.open(directory); - IndexSearcher shardSearcher = newSearcher(directoryReader); - // Disable query cache, because ControlQuery cannot be cached... - shardSearcher.setQueryCache(null); - - for (int i = 0; i < numDocs; i++) { - String id = Integer.toString(i); - MemoryIndex memoryIndex = new MemoryIndex(); - memoryIndex.addField("field", id, new WhitespaceAnalyzer()); - duelRun(memoryIndex, shardSearcher); - } - - MemoryIndex memoryIndex = new MemoryIndex(); - memoryIndex.addField("field", "value", new WhitespaceAnalyzer()); - duelRun(memoryIndex, shardSearcher); - // Empty percolator doc: - memoryIndex = new MemoryIndex(); - duelRun(memoryIndex, shardSearcher); - } - - public void testDuelSpecificQueries() throws Exception { - CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); - commonTermsQuery.add(new Term("field", "quick")); - commonTermsQuery.add(new Term("field", "brown")); - commonTermsQuery.add(new Term("field", "fox")); - addPercolatorQuery("_id1", commonTermsQuery); - - BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery(new Term[]{new Term("field", "quick"), - new Term("field", "brown"), new Term("field", "fox")}, false); - addPercolatorQuery("_id2", blendedTermQuery); - - SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true) - .addClause(new SpanTermQuery(new Term("field", "quick"))) - .addClause(new SpanTermQuery(new Term("field", "brown"))) - .addClause(new SpanTermQuery(new Term("field", "fox"))) - .build(); - addPercolatorQuery("_id3", spanNearQuery); - - SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true) - .addClause(new SpanTermQuery(new Term("field", "the"))) - .addClause(new SpanTermQuery(new Term("field", "lazy"))) - .addClause(new SpanTermQuery(new Term("field", "doc"))) - .build(); - SpanOrQuery spanOrQuery = new SpanOrQuery( - spanNearQuery, - spanNearQuery2 - ); - addPercolatorQuery("_id4", spanOrQuery); - - SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery); - addPercolatorQuery("_id5", spanNotQuery); - - indexWriter.close(); - directoryReader = DirectoryReader.open(directory); - IndexSearcher shardSearcher = newSearcher(directoryReader); - // Disable query cache, because ControlQuery cannot be cached... - shardSearcher.setQueryCache(null); - - MemoryIndex memoryIndex = new MemoryIndex(); - memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); - duelRun(memoryIndex, shardSearcher); - } - - void addPercolatorQuery(String id, Query query, String... extraFields) throws IOException { - queries.put(id, query); - ParseContext.Document document = new ParseContext.Document(); - ExtractQueryTermsService.extractQueryTerms(query, document, EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME, - EXTRACTED_TERMS_FIELD_TYPE); - document.add(new StoredField(UidFieldMapper.NAME, Uid.createUid(MapperService.PERCOLATOR_LEGACY_TYPE_NAME, id))); - assert extraFields.length % 2 == 0; - for (int i = 0; i < extraFields.length; i++) { - document.add(new StringField(extraFields[i], extraFields[++i], Field.Store.NO)); - } - indexWriter.addDocument(document); - } - - private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException { - boolean requireScore = randomBoolean(); - IndexSearcher percolateSearcher = memoryIndex.createSearcher(); - PercolateQuery.Builder builder = new PercolateQuery.Builder( - "docType", - queryStore, - new BytesArray("{}"), - percolateSearcher - ); - // enables the optimization that prevents queries from being evaluated that don't match - builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); - Query query = requireScore ? builder.build() : new ConstantScoreQuery(builder.build()); - TopDocs topDocs = shardSearcher.search(query, 10); - - Query controlQuery = new ControlQuery(memoryIndex, queryStore); - controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery); - TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10); - assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits)); - assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length)); - for (int j = 0; j < topDocs.scoreDocs.length; j++) { - assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc)); - assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score)); - if (requireScore) { - Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc); - Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc); - assertThat(explain1.isMatch(), equalTo(explain2.isMatch())); - assertThat(explain1.getValue(), equalTo(explain2.getValue())); - } - } - } - - private static final class CustomQuery extends Query { - - private final Term term; - - private CustomQuery(Term term) { - this.term = term; - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - return new TermQuery(term); - } - - @Override - public String toString(String field) { - return "custom{" + field + "}"; - } - - @Override - public boolean equals(Object obj) { - return sameClassAs(obj); - } - - @Override - public int hashCode() { - return classHash(); - } - } - - private static final class ControlQuery extends Query { - - private final MemoryIndex memoryIndex; - private final PercolateQuery.QueryStore queryStore; - - private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) { - this.memoryIndex = memoryIndex; - this.queryStore = queryStore; - } - - @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) { - return new ConstantScoreWeight(this) { - - float _score; - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); - if (scorer != null) { - int result = scorer.iterator().advance(doc); - if (result == doc) { - return Explanation.match(scorer.score(), "ControlQuery"); - } - } - return Explanation.noMatch("ControlQuery"); - } - - @Override - public String toString() { - return "weight(" + ControlQuery.this + ")"; - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc()); - PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context); - FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) { - - @Override - protected boolean match(int doc) { - try { - Query query = leaf.getQuery(doc); - float score = memoryIndex.search(query); - if (score != 0f) { - if (needsScores) { - _score = score; - } - return true; - } else { - return false; - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - }; - return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) { - - @Override - public float score() throws IOException { - return _score; - } - }; - } - }; - } - - @Override - public String toString(String field) { - return "control{" + field + "}"; - } - - @Override - public boolean equals(Object obj) { - return sameClassAs(obj); - } - - @Override - public int hashCode() { - return classHash(); - } - - } - } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java index d6221184b6e..57dda2f55cd 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java @@ -19,26 +19,51 @@ package org.elasticsearch.percolator; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.PrefixCodedTerms; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.queries.TermsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.BoostingQueryBuilder; +import org.elasticsearch.index.query.ConstantScoreQueryBuilder; +import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryParseContext; import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.elasticsearch.index.query.functionscore.RandomScoreFunctionBuilder; import org.elasticsearch.indices.TermsLookup; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.List; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; @@ -49,8 +74,9 @@ import static org.elasticsearch.index.query.QueryBuilders.rangeQuery; import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery; import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery; -import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE; -import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED; +import static org.elasticsearch.percolator.PercolatorFieldMapper.EXTRACTION_COMPLETE; +import static org.elasticsearch.percolator.PercolatorFieldMapper.EXTRACTION_FAILED; +import static org.elasticsearch.percolator.PercolatorFieldMapper.EXTRACTION_PARTIAL; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -61,7 +87,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { private String fieldName; private IndexService indexService; private MapperService mapperService; - private PercolatorFieldMapper.PercolatorFieldType fieldType; + private PercolatorFieldMapper.FieldType fieldType; @Override protected Collection> getPlugins() { @@ -77,6 +103,10 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { .startObject("_field_names").field("enabled", false).endObject() // makes testing easier .startObject("properties") .startObject("field").field("type", "text").endObject() + .startObject("field1").field("type", "text").endObject() + .startObject("field2").field("type", "text").endObject() + .startObject("_field3").field("type", "text").endObject() + .startObject("field4").field("type", "text").endObject() .startObject("number_field").field("type", "long").endObject() .startObject("date_field").field("type", "date").endObject() .endObject().endObject().endObject().string(); @@ -90,7 +120,101 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { .startObject("properties").startObject(fieldName).field("type", "percolator").endObject().endObject() .endObject().endObject().string(); mapperService.merge(typeName, new CompressedXContent(percolatorMapper), MapperService.MergeReason.MAPPING_UPDATE, true); - fieldType = (PercolatorFieldMapper.PercolatorFieldType) mapperService.fullName(fieldName); + fieldType = (PercolatorFieldMapper.FieldType) mapperService.fullName(fieldName); + } + + public void testExtractTerms() throws Exception { + addQueryMapping(); + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + TermQuery termQuery1 = new TermQuery(new Term("field", "term1")); + bq.add(termQuery1, BooleanClause.Occur.SHOULD); + TermQuery termQuery2 = new TermQuery(new Term("field", "term2")); + bq.add(termQuery2, BooleanClause.Occur.SHOULD); + + DocumentMapper documentMapper = mapperService.documentMapper(typeName); + PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName); + ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, + mapperService.documentMapperParser(), documentMapper, null, null); + fieldMapper.processQuery(bq.build(), parseContext); + ParseContext.Document document = parseContext.doc(); + + PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_COMPLETE)); + List fields = new ArrayList<>(Arrays.asList(document.getFields(fieldType.queryTermsField.name()))); + Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue())); + assertThat(fields.size(), equalTo(2)); + assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field\u0000term1")); + assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field\u0000term2")); + } + + public void testExtractTermsAndRanges_failed() throws Exception { + addQueryMapping(); + TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true); + DocumentMapper documentMapper = mapperService.documentMapper(typeName); + PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName); + ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, + mapperService.documentMapperParser(), documentMapper, null, null); + fieldMapper.processQuery(query, parseContext); + ParseContext.Document document = parseContext.doc(); + + PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + assertThat(document.getFields().size(), equalTo(1)); + assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_FAILED)); + } + + public void testExtractTermsAndRanges_partial() throws Exception { + addQueryMapping(); + PhraseQuery phraseQuery = new PhraseQuery("field", "term"); + DocumentMapper documentMapper = mapperService.documentMapper(typeName); + PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName); + ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, + mapperService.documentMapperParser(), documentMapper, null, null); + fieldMapper.processQuery(phraseQuery, parseContext); + ParseContext.Document document = parseContext.doc(); + + PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType(); + assertThat(document.getFields().size(), equalTo(2)); + assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term")); + assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_PARTIAL)); + } + + public void testCreateCandidateQuery() throws Exception { + addQueryMapping(); + + MemoryIndex memoryIndex = new MemoryIndex(false); + memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); + memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer()); + memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer()); + memoryIndex.addField("field4", "123", new WhitespaceAnalyzer()); + memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer()); + + IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); + + TermsQuery termsQuery = (TermsQuery) fieldType.createCandidateQuery(indexReader); + + PrefixCodedTerms terms = termsQuery.getTermData(); + assertThat(terms.size(), equalTo(15L)); + PrefixCodedTerms.TermIterator termIterator = terms.iterator(); + assertTermIterator(termIterator, "_field3\u0000me", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "_field3\u0000unhide", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000brown", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000dog", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000fox", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000jumps", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000lazy", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000over", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000quick", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field1\u0000the", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field2\u0000more", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field2\u0000some", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field2\u0000text", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, "field4\u0000123", fieldType.queryTermsField.name()); + assertTermIterator(termIterator, EXTRACTION_FAILED, fieldType.extractionResultField.name()); + } + + private void assertTermIterator(PrefixCodedTerms.TermIterator termIterator, String expectedValue, String expectedField) { + assertThat(termIterator.next().utf8ToString(), equalTo(expectedValue)); + assertThat(termIterator.field(), equalTo(expectedField)); } public void testPercolatorFieldMapper() throws Exception { @@ -100,12 +224,13 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { .field(fieldName, queryBuilder) .endObject().bytes()); - assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1)); - assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value")); - assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1)); - assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1)); - assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_COMPLETE)); - BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue(); + assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name()).length, equalTo(1)); + assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name())[0].binaryValue().utf8ToString(), equalTo("field\0value")); + assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(1)); + assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name()).length, equalTo(1)); + assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name())[0].stringValue(), + equalTo(EXTRACTION_COMPLETE)); + BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue(); assertQueryBuilder(qbSource, queryBuilder); // add an query for which we don't extract terms from @@ -113,11 +238,12 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject() .field(fieldName, queryBuilder) .endObject().bytes()); - assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1)); - assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_FAILED)); - assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0)); - assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1)); - qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue(); + assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name()).length, equalTo(1)); + assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name())[0].stringValue(), + equalTo(EXTRACTION_FAILED)); + assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name()).length, equalTo(0)); + assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(1)); + qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue(); assertQueryBuilder(qbSource, queryBuilder); } @@ -136,7 +262,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { XContentFactory.jsonBuilder().startObject() .field(fieldName, query) .endObject().bytes()); - BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue(); + BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue(); assertQueryBuilder(qbSource, query); } } @@ -148,7 +274,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { ParsedDocument doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject() .field(fieldName, queryBuilder) .endObject().bytes()); - BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue(); + BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue(); assertQueryBuilder(qbSource, queryBuilder.rewrite(indexService.newQueryShardContext())); } @@ -169,7 +295,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { addQueryMapping(); ParsedDocument doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject() .endObject().bytes()); - assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(0)); + assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(0)); try { mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject() @@ -275,6 +401,53 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase { assertThat(e.getCause().getMessage(), equalTo("a document can only contain one percolator query")); } + public void testRangeQueryWithNowRangeIsForbidden() throws Exception { + addQueryMapping(); + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapperService.documentMapper(typeName).parse("test", typeName, "1", + jsonBuilder().startObject() + .field(fieldName, rangeQuery("date_field").from("2016-01-01||/D").to("now")) + .endObject().bytes()); + } + ); + assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); + e = expectThrows(MapperParsingException.class, () -> { + mapperService.documentMapper(typeName).parse("test", typeName, "1", + jsonBuilder().startObject() + .field(fieldName, rangeQuery("date_field").from("2016-01-01||/D").to("now/D")) + .endObject().bytes()); + } + ); + assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); + e = expectThrows(MapperParsingException.class, () -> { + mapperService.documentMapper(typeName).parse("test", typeName, "1", + jsonBuilder().startObject() + .field(fieldName, rangeQuery("date_field").from("now-1d").to("now")) + .endObject().bytes()); + } + ); + assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); + } + + public void testVerifyRangeQueries() { + RangeQueryBuilder rangeQuery1 = new RangeQueryBuilder("field").from("2016-01-01||/D").to("2017-01-01||/D"); + RangeQueryBuilder rangeQuery2 = new RangeQueryBuilder("field").from("2016-01-01||/D").to("now"); + PercolatorFieldMapper.verifyRangeQueries(rangeQuery1); + expectThrows(IllegalArgumentException.class, () -> PercolatorFieldMapper.verifyRangeQueries(rangeQuery2)); + PercolatorFieldMapper.verifyRangeQueries(new BoolQueryBuilder().must(rangeQuery1)); + expectThrows(IllegalArgumentException.class, () -> + PercolatorFieldMapper.verifyRangeQueries(new BoolQueryBuilder().must(rangeQuery2))); + PercolatorFieldMapper.verifyRangeQueries(new ConstantScoreQueryBuilder((rangeQuery1))); + expectThrows(IllegalArgumentException.class, () -> + PercolatorFieldMapper.verifyRangeQueries(new ConstantScoreQueryBuilder(rangeQuery2))); + PercolatorFieldMapper.verifyRangeQueries(new BoostingQueryBuilder(rangeQuery1, new MatchAllQueryBuilder())); + expectThrows(IllegalArgumentException.class, () -> + PercolatorFieldMapper.verifyRangeQueries(new BoostingQueryBuilder(rangeQuery2, new MatchAllQueryBuilder()))); + PercolatorFieldMapper.verifyRangeQueries(new FunctionScoreQueryBuilder(rangeQuery1, new RandomScoreFunctionBuilder())); + expectThrows(IllegalArgumentException.class, () -> + PercolatorFieldMapper.verifyRangeQueries(new FunctionScoreQueryBuilder(rangeQuery2, new RandomScoreFunctionBuilder()))); + } + private void assertQueryBuilder(BytesRef actual, QueryBuilder expected) throws IOException { XContentParser sourceParser = PercolatorFieldMapper.QUERY_BUILDER_CONTENT_TYPE.xContent() .createParser(actual.bytes, actual.offset, actual.length); diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhaseTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhaseTests.java index 69ef623f801..0870fb2b32c 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhaseTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhaseTests.java @@ -45,10 +45,9 @@ import static org.hamcrest.Matchers.sameInstance; public class PercolatorHighlightSubFetchPhaseTests extends ESTestCase { public void testHitsExecutionNeeded() { - PercolateQuery percolateQuery = new PercolateQuery.Builder("", ctx -> null, new BytesArray("{}"), - Mockito.mock(IndexSearcher.class)) - .build(); - + PercolateQuery percolateQuery = new PercolateQuery( + "", ctx -> null, new BytesArray("{}"), new MatchAllDocsQuery(), Mockito.mock(IndexSearcher.class), new MatchAllDocsQuery() + ); PercolatorHighlightSubFetchPhase subFetchPhase = new PercolatorHighlightSubFetchPhase(Settings.EMPTY, new Highlighters(Settings.EMPTY)); SearchContext searchContext = Mockito.mock(SearchContext.class); @@ -61,10 +60,9 @@ public class PercolatorHighlightSubFetchPhaseTests extends ESTestCase { } public void testLocatePercolatorQuery() { - PercolateQuery percolateQuery = new PercolateQuery.Builder("", ctx -> null, new BytesArray("{}"), - Mockito.mock(IndexSearcher.class)) - .build(); - + PercolateQuery percolateQuery = new PercolateQuery( + "", ctx -> null, new BytesArray("{}"), new MatchAllDocsQuery(), Mockito.mock(IndexSearcher.class), new MatchAllDocsQuery() + ); assertThat(PercolatorHighlightSubFetchPhase.locatePercolatorQuery(new MatchAllDocsQuery()), nullValue()); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER); diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorIT.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorIT.java index e4e379c8f60..bdfa49016e9 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorIT.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorIT.java @@ -1553,31 +1553,6 @@ public class PercolatorIT extends ESIntegTestCase { } } - public void testPercolatorQueryWithNowRange() throws Exception { - client().admin().indices().prepareCreate(INDEX_NAME) - .addMapping("my-type", "timestamp", "type=date,format=epoch_millis") - .addMapping(TYPE_NAME, "query", "type=percolator") - .get(); - ensureGreen(); - - client().prepareIndex(INDEX_NAME, TYPE_NAME, "1") - .setSource(jsonBuilder().startObject().field("query", rangeQuery("timestamp").from("now-1d").to("now")).endObject()) - .get(); - client().prepareIndex(INDEX_NAME, TYPE_NAME, "2") - .setSource(jsonBuilder().startObject().field("query", constantScoreQuery(rangeQuery("timestamp").from("now-1d").to("now"))).endObject()) - .get(); - refresh(); - - logger.info("--> Percolate doc with field1=b"); - PercolateResponse response = preparePercolate(client()) - .setIndices(INDEX_NAME).setDocumentType("my-type") - .setPercolateDoc(docBuilder().setDoc("timestamp", System.currentTimeMillis())) - .get(); - assertMatchCount(response, 2L); - assertThat(response.getMatches(), arrayWithSize(2)); - assertThat(convertFromTextArray(response.getMatches(), INDEX_NAME), arrayContainingInAnyOrder("1", "2")); - } - void initNestedIndexAndPercolation() throws IOException { XContentBuilder mapping = XContentFactory.jsonBuilder(); mapping.startObject().startObject("properties").startObject("companyname").field("type", "text").endObject() diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java index 5125a7ea5cc..3cc60d75cf2 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java @@ -44,6 +44,7 @@ import static org.elasticsearch.index.query.QueryBuilders.commonTermsQuery; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery; +import static org.elasticsearch.index.query.QueryBuilders.rangeQuery; import static org.elasticsearch.index.query.QueryBuilders.spanNearQuery; import static org.elasticsearch.index.query.QueryBuilders.spanNotQuery; import static org.elasticsearch.index.query.QueryBuilders.spanTermQuery; @@ -109,6 +110,102 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase { assertThat(response.getHits().getAt(2).getId(), equalTo("3")); } + public void testPercolatorRangeQueries() throws Exception { + createIndex("test", client().admin().indices().prepareCreate("test") + .addMapping("type", "field1", "type=long", "field2", "type=double", "field3", "type=ip") + .addMapping("queries", "query", "type=percolator") + ); + + client().prepareIndex("test", "queries", "1") + .setSource(jsonBuilder().startObject().field("query", rangeQuery("field1").from(10).to(12)).endObject()) + .get(); + client().prepareIndex("test", "queries", "2") + .setSource(jsonBuilder().startObject().field("query", rangeQuery("field1").from(20).to(22)).endObject()) + .get(); + client().prepareIndex("test", "queries", "3") + .setSource(jsonBuilder().startObject().field("query", boolQuery() + .must(rangeQuery("field1").from(10).to(12)) + .must(rangeQuery("field1").from(12).to(14)) + ).endObject()).get(); + client().admin().indices().prepareRefresh().get(); + client().prepareIndex("test", "queries", "4") + .setSource(jsonBuilder().startObject().field("query", rangeQuery("field2").from(10).to(12)).endObject()) + .get(); + client().prepareIndex("test", "queries", "5") + .setSource(jsonBuilder().startObject().field("query", rangeQuery("field2").from(20).to(22)).endObject()) + .get(); + client().prepareIndex("test", "queries", "6") + .setSource(jsonBuilder().startObject().field("query", boolQuery() + .must(rangeQuery("field2").from(10).to(12)) + .must(rangeQuery("field2").from(12).to(14)) + ).endObject()).get(); + client().admin().indices().prepareRefresh().get(); + client().prepareIndex("test", "queries", "7") + .setSource(jsonBuilder().startObject() + .field("query", rangeQuery("field3").from("192.168.1.0").to("192.168.1.5")) + .endObject()) + .get(); + client().prepareIndex("test", "queries", "8") + .setSource(jsonBuilder().startObject() + .field("query", rangeQuery("field3").from("192.168.1.20").to("192.168.1.30")) + .endObject()) + .get(); + client().prepareIndex("test", "queries", "9") + .setSource(jsonBuilder().startObject().field("query", boolQuery() + .must(rangeQuery("field3").from("192.168.1.0").to("192.168.1.5")) + .must(rangeQuery("field3").from("192.168.1.5").to("192.168.1.10")) + ).endObject()).get(); + client().admin().indices().prepareRefresh().get(); + + // Test long range: + BytesReference source = jsonBuilder().startObject().field("field1", 12).endObject().bytes(); + SearchResponse response = client().prepareSearch() + .setQuery(new PercolateQueryBuilder("query", "type", source)) + .get(); + assertHitCount(response, 2); + assertThat(response.getHits().getAt(0).getId(), equalTo("3")); + assertThat(response.getHits().getAt(1).getId(), equalTo("1")); + + source = jsonBuilder().startObject().field("field1", 11).endObject().bytes(); + response = client().prepareSearch() + .setQuery(new PercolateQueryBuilder("query", "type", source)) + .get(); + assertHitCount(response, 1); + assertThat(response.getHits().getAt(0).getId(), equalTo("1")); + + // Test double range: + source = jsonBuilder().startObject().field("field2", 12).endObject().bytes(); + response = client().prepareSearch() + .setQuery(new PercolateQueryBuilder("query", "type", source)) + .get(); + assertHitCount(response, 2); + assertThat(response.getHits().getAt(0).getId(), equalTo("6")); + assertThat(response.getHits().getAt(1).getId(), equalTo("4")); + + source = jsonBuilder().startObject().field("field2", 11).endObject().bytes(); + response = client().prepareSearch() + .setQuery(new PercolateQueryBuilder("query", "type", source)) + .get(); + assertHitCount(response, 1); + assertThat(response.getHits().getAt(0).getId(), equalTo("4")); + + // Test IP range: + source = jsonBuilder().startObject().field("field3", "192.168.1.5").endObject().bytes(); + response = client().prepareSearch() + .setQuery(new PercolateQueryBuilder("query", "type", source)) + .get(); + assertHitCount(response, 2); + assertThat(response.getHits().getAt(0).getId(), equalTo("9")); + assertThat(response.getHits().getAt(1).getId(), equalTo("7")); + + source = jsonBuilder().startObject().field("field3", "192.168.1.4").endObject().bytes(); + response = client().prepareSearch() + .setQuery(new PercolateQueryBuilder("query", "type", source)) + .get(); + assertHitCount(response, 1); + assertThat(response.getHits().getAt(0).getId(), equalTo("7")); + } + public void testPercolatorQueryExistingDocument() throws Exception { createIndex("test", client().admin().indices().prepareCreate("test") .addMapping("type", "field1", "type=keyword", "field2", "type=keyword") diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/ExtractQueryTermsServiceTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java similarity index 74% rename from modules/percolator/src/test/java/org/elasticsearch/percolator/ExtractQueryTermsServiceTests.java rename to modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index 069aac9eda2..1b8b123aa13 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/ExtractQueryTermsServiceTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -18,14 +18,7 @@ */ package org.elasticsearch.percolator; -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.PrefixCodedTerms; import org.apache.lucene.index.Term; -import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.queries.TermsQuery; @@ -48,11 +41,9 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; import org.elasticsearch.common.lucene.search.function.RandomScoreFunction; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.percolator.ExtractQueryTermsService.Result; +import org.elasticsearch.percolator.QueryAnalyzer.Result; import org.elasticsearch.test.ESTestCase; - import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -60,70 +51,18 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE; -import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED; -import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_PARTIAL; -import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException; -import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms; -import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery; -import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm; +import static org.elasticsearch.percolator.QueryAnalyzer.UnsupportedQueryException; +import static org.elasticsearch.percolator.QueryAnalyzer.analyze; +import static org.elasticsearch.percolator.QueryAnalyzer.selectTermListWithTheLongestShortestTerm; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.sameInstance; -public class ExtractQueryTermsServiceTests extends ESTestCase { - - public static final String QUERY_TERMS_FIELD = "extracted_terms"; - public static final String EXTRACTION_RESULT_FIELD = "extraction_result"; - public static final FieldType QUERY_TERMS_FIELD_TYPE = new FieldType(); - - static { - QUERY_TERMS_FIELD_TYPE.setTokenized(false); - QUERY_TERMS_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); - QUERY_TERMS_FIELD_TYPE.freeze(); - } - - public void testExtractQueryMetadata() { - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - TermQuery termQuery1 = new TermQuery(new Term("field1", "term1")); - bq.add(termQuery1, BooleanClause.Occur.SHOULD); - TermQuery termQuery2 = new TermQuery(new Term("field2", "term2")); - bq.add(termQuery2, BooleanClause.Occur.SHOULD); - - ParseContext.Document document = new ParseContext.Document(); - extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE); - assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_COMPLETE)); - List fields = new ArrayList<>(Arrays.asList(document.getFields(QUERY_TERMS_FIELD))); - Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue())); - assertThat(fields.size(), equalTo(2)); - assertThat(fields.get(0).name(), equalTo(QUERY_TERMS_FIELD)); - assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1")); - assertThat(fields.get(1).name(), equalTo(QUERY_TERMS_FIELD)); - assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2")); - } - - public void testExtractQueryMetadata_unsupported() { - TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true); - ParseContext.Document document = new ParseContext.Document(); - extractQueryTerms(query, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE); - assertThat(document.getFields().size(), equalTo(1)); - assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_FAILED)); - } - - public void testExtractQueryMetadata_notVerified() { - PhraseQuery phraseQuery = new PhraseQuery("field", "term"); - - ParseContext.Document document = new ParseContext.Document(); - extractQueryTerms(phraseQuery, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE); - assertThat(document.getFields().size(), equalTo(2)); - assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD)); - assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term")); - assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_PARTIAL)); - } +public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_termQuery() { TermQuery termQuery = new TermQuery(new Term("_field", "_term")); - Result result = extractQueryTerms(termQuery); + Result result = analyze(termQuery); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.terms); assertThat(terms.size(), equalTo(1)); @@ -133,7 +72,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { public void testExtractQueryMetadata_termsQuery() { TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2")); - Result result = extractQueryTerms(termsQuery); + Result result = analyze(termsQuery); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -145,7 +84,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { // test with different fields termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2")); - result = extractQueryTerms(termsQuery); + result = analyze(termsQuery); assertThat(result.verified, is(true)); terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -158,7 +97,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { public void testExtractQueryMetadata_phraseQuery() { PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2"); - Result result = extractQueryTerms(phraseQuery); + Result result = analyze(phraseQuery); assertThat(result.verified, is(false)); List terms = new ArrayList<>(result.terms); assertThat(terms.size(), equalTo(1)); @@ -181,7 +120,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); - Result result = extractQueryTerms(booleanQuery); + Result result = analyze(booleanQuery); assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false)); List terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -209,7 +148,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); - Result result = extractQueryTerms(booleanQuery); + Result result = analyze(booleanQuery); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -232,7 +171,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(phraseQuery, BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); - Result result = extractQueryTerms(booleanQuery); + Result result = analyze(booleanQuery); assertThat(result.verified, is(false)); List terms = new ArrayList<>(result.terms); assertThat(terms.size(), equalTo(1)); @@ -246,58 +185,58 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(termQuery1, BooleanClause.Occur.SHOULD); TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2")); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - Result result = extractQueryTerms(builder.build()); + Result result = analyze(builder.build()); assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, BooleanClause.Occur.SHOULD); PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2"); builder.add(phraseQuery1, BooleanClause.Occur.SHOULD); - result = extractQueryTerms(builder.build()); + result = analyze(builder.build()); assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(phraseQuery1, BooleanClause.Occur.SHOULD); PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4"); builder.add(phraseQuery2, BooleanClause.Occur.SHOULD); - result = extractQueryTerms(builder.build()); + result = analyze(builder.build()); assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, BooleanClause.Occur.MUST_NOT); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - result = extractQueryTerms(builder.build()); + result = analyze(builder.build()); assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32)); builder.add(termQuery1, BooleanClause.Occur.SHOULD); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - result = extractQueryTerms(builder.build()); + result = analyze(builder.build()); assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); - result = extractQueryTerms(builder.build()); + result = analyze(builder.build()); assertThat("Single required clause, so candidate matches are verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); - result = extractQueryTerms(builder.build()); + result = analyze(builder.build()); assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false)); builder = new BooleanQuery.Builder(); builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); builder.add(termQuery2, BooleanClause.Occur.MUST_NOT); - result = extractQueryTerms(builder.build()); + result = analyze(builder.build()); assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false)); } public void testExtractQueryMetadata_constantScoreQuery() { TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1); - Result result = extractQueryTerms(constantScoreQuery); + Result result = analyze(constantScoreQuery); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.terms); assertThat(terms.size(), equalTo(1)); @@ -308,7 +247,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { public void testExtractQueryMetadata_boostQuery() { TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f); - Result result = extractQueryTerms(constantScoreQuery); + Result result = analyze(constantScoreQuery); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.terms); assertThat(terms.size(), equalTo(1)); @@ -320,7 +259,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100); commonTermsQuery.add(new Term("_field", "_term1")); commonTermsQuery.add(new Term("_field", "_term2")); - Result result = extractQueryTerms(commonTermsQuery); + Result result = analyze(commonTermsQuery); assertThat(result.verified, is(false)); List terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -334,7 +273,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { public void testExtractQueryMetadata_blendedTermQuery() { Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")}; BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(termsArr, false); - Result result = extractQueryTerms(commonTermsQuery); + Result result = analyze(commonTermsQuery); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -358,7 +297,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { // 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); - Result result = extractQueryTerms(spanTermQuery1); + Result result = analyze(spanTermQuery1); assertThat(result.verified, is(true)); assertTermsEqual(result.terms, spanTermQuery1.getTerm()); } @@ -369,7 +308,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true) .addClause(spanTermQuery1).addClause(spanTermQuery2).build(); - Result result = extractQueryTerms(spanNearQuery); + Result result = analyze(spanNearQuery); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, spanTermQuery2.getTerm()); } @@ -378,7 +317,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2); - Result result = extractQueryTerms(spanOrQuery); + Result result = analyze(spanOrQuery); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm()); } @@ -386,7 +325,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { public void testExtractQueryMetadata_spanFirstQuery() { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20); - Result result = extractQueryTerms(spanFirstQuery); + Result result = analyze(spanFirstQuery); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, spanTermQuery1.getTerm()); } @@ -395,27 +334,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2); - Result result = extractQueryTerms(spanNotQuery); + Result result = analyze(spanNotQuery); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, spanTermQuery1.getTerm()); } public void testExtractQueryMetadata_matchNoDocsQuery() { - Result result = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all")); + Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all")); assertThat(result.verified, is(true)); assertEquals(0, result.terms.size()); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST); - result = extractQueryTerms(bq.build()); + result = analyze(bq.build()); assertThat(result.verified, is(false)); assertEquals(0, result.terms.size()); bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD); - result = extractQueryTerms(bq.build()); + result = analyze(bq.build()); assertThat(result.verified, is(true)); assertTermsEqual(result.terms, new Term("field", "value")); @@ -423,18 +362,18 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")), 1f ); - result = extractQueryTerms(disjunctionMaxQuery); + result = analyze(disjunctionMaxQuery); assertThat(result.verified, is(true)); assertTermsEqual(result.terms, new Term("field", "value")); } public void testExtractQueryMetadata_matchAllDocsQuery() { - expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(new MatchAllDocsQuery())); + expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery())); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); - Result result = extractQueryTerms(builder.build()); + Result result = analyze(builder.build()); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, new Term("field", "value")); @@ -443,39 +382,39 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); BooleanQuery bq1 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq1)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq1)); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); BooleanQuery bq2 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq2)); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq3 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq3)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq3)); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq4 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq4)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq4)); builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq5 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq5)); + expectThrows(UnsupportedQueryException.class, () -> analyze(bq5)); } public void testExtractQueryMetadata_unsupportedQuery() { TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false); - UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(termRangeQuery)); + UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(termRangeQuery)); assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery)); TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); @@ -484,7 +423,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(termRangeQuery, BooleanClause.Occur.SHOULD); BooleanQuery bq = builder.build(); - e = expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq)); + e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq)); assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery)); } @@ -497,7 +436,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(unsupportedQuery, BooleanClause.Occur.MUST); BooleanQuery bq1 = builder.build(); - Result result = extractQueryTerms(bq1); + Result result = analyze(bq1); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, termQuery1.getTerm()); @@ -507,7 +446,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(termQuery2, BooleanClause.Occur.MUST); builder.add(unsupportedQuery, BooleanClause.Occur.MUST); bq1 = builder.build(); - result = extractQueryTerms(bq1); + result = analyze(bq1); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, termQuery2.getTerm()); @@ -515,7 +454,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { builder.add(unsupportedQuery, BooleanClause.Occur.MUST); builder.add(unsupportedQuery, BooleanClause.Occur.MUST); BooleanQuery bq2 = builder.build(); - UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2)); + UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq2)); assertThat(e.getUnsupportedQuery(), sameInstance(unsupportedQuery)); } @@ -528,7 +467,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f ); - Result result = extractQueryTerms(disjunctionMaxQuery); + Result result = analyze(disjunctionMaxQuery); assertThat(result.verified, is(true)); List terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -546,7 +485,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f ); - result = extractQueryTerms(disjunctionMaxQuery); + result = analyze(disjunctionMaxQuery); assertThat(result.verified, is(false)); terms = new ArrayList<>(result.terms); Collections.sort(terms); @@ -563,12 +502,12 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { public void testSynonymQuery() { SynonymQuery query = new SynonymQuery(); - Result result = extractQueryTerms(query); + Result result = analyze(query); assertThat(result.verified, is(true)); assertThat(result.terms.isEmpty(), is(true)); query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2")); - result = extractQueryTerms(query); + result = analyze(query); assertThat(result.verified, is(true)); assertTermsEqual(result.terms, new Term("_field", "_value1"), new Term("_field", "_value2")); } @@ -576,47 +515,16 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { public void testFunctionScoreQuery() { TermQuery termQuery = new TermQuery(new Term("_field", "_value")); FunctionScoreQuery functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction()); - Result result = extractQueryTerms(functionScoreQuery); + Result result = analyze(functionScoreQuery); assertThat(result.verified, is(true)); assertTermsEqual(result.terms, new Term("_field", "_value")); functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(), 1f, null, 10f); - result = extractQueryTerms(functionScoreQuery); + result = analyze(functionScoreQuery); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, new Term("_field", "_value")); } - public void testCreateQueryMetadataQuery() throws Exception { - MemoryIndex memoryIndex = new MemoryIndex(false); - memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); - memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer()); - memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer()); - memoryIndex.addField("field4", "123", new WhitespaceAnalyzer()); - - IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); - TermsQuery query = (TermsQuery) - createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED)); - - PrefixCodedTerms terms = query.getTermData(); - assertThat(terms.size(), equalTo(15L)); - PrefixCodedTerms.TermIterator termIterator = terms.iterator(); - assertTermIterator(termIterator, "_field3\u0000me", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "_field3\u0000unhide", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000brown", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000dog", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000fox", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000jumps", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000lazy", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000over", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000quick", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field1\u0000the", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field2\u0000more", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD); - assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD); - } - public void testSelectTermsListWithHighestSumOfTermLength() { Set terms1 = new HashSet<>(); int shortestTerms1Length = Integer.MAX_VALUE; @@ -643,11 +551,6 @@ public class ExtractQueryTermsServiceTests extends ESTestCase { assertThat(result, sameInstance(expected)); } - private void assertTermIterator(PrefixCodedTerms.TermIterator termIterator, String expectedValue, String expectedField) { - assertThat(termIterator.next().utf8ToString(), equalTo(expectedValue)); - assertThat(termIterator.field(), equalTo(expectedField)); - } - private static void assertTermsEqual(Set actual, Term... expected) { assertEquals(new HashSet<>(Arrays.asList(expected)), actual); }