percolator: Forbid the usage or `range` queries with a range based on the current time

If there are percolator queries containing `range` queries with ranges based on the current time then this can lead to incorrect results if the `percolate` query gets cached.  These ranges are changing each time the `percolate` query gets executed and if this query gets cached then the results will be based on how the range was at the time when the `percolate` query got cached.

The ExtractQueryTermsService has been renamed `QueryAnalyzer` and now only deals with analyzing the query (extracting terms and deciding if the entire query is a verified match) . The `PercolatorFieldMapper` is responsible for adding the right fields based on the analysis the `QueryAnalyzer` has performed, because this is highly dependent on the field mappings. Also the `PercolatorFieldMapper` is responsible for creating the percolate query.
This commit is contained in:
Martijn van Groningen 2016-07-08 12:32:45 +02:00
parent f6ac147b1d
commit ff5527f037
19 changed files with 1063 additions and 879 deletions

View File

@ -71,11 +71,14 @@ a percolator query does not exist, it will be handled as a default string field
fail.
[float]
==== Important Notes
==== Limitations
Because the `percolate` query is processing one document at a time, it doesn't support queries and filters that run
against child documents such as `has_child` and `has_parent`.
The percolator doesn't accepts percolator queries containing `range` queries with ranges that are based on current
time (using `now`).
There are a number of queries that fetch data via a get call during query parsing. For example the `terms` query when
using terms lookup, `template` query when using indexed scripts and `geo_shape` when using pre-indexed shapes. When these
queries are indexed by the `percolator` field type then the get call is executed once. So each time the `percolator`

View File

@ -48,6 +48,11 @@ the existing document.
The percolate stats have been removed. This is because the percolator no longer caches the percolator queries.
==== Percolator queries containing range queries with now ranges
The percolator no longer accepts percolator queries containing `range` queries with ranges that are based on current
time (using `now`).
==== Java client
The percolator is no longer part of the core elasticsearch dependency. It has moved to the percolator module.

View File

@ -21,6 +21,7 @@ package org.elasticsearch.percolator;
import org.elasticsearch.action.Action;
import org.elasticsearch.client.ElasticsearchClient;
@Deprecated
public class MultiPercolateAction extends Action<MultiPercolateRequest, MultiPercolateResponse, MultiPercolateRequestBuilder> {
public static final MultiPercolateAction INSTANCE = new MultiPercolateAction();

View File

@ -22,6 +22,7 @@ package org.elasticsearch.percolator;
import org.elasticsearch.action.Action;
import org.elasticsearch.client.ElasticsearchClient;
@Deprecated
public class PercolateAction extends Action<PercolateRequest, PercolateResponse, PercolateRequestBuilder> {
public static final PercolateAction INSTANCE = new PercolateAction();

View File

@ -22,13 +22,11 @@ package org.elasticsearch.percolator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
@ -36,115 +34,39 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import static org.apache.lucene.search.BooleanClause.Occur.FILTER;
public final class PercolateQuery extends Query implements Accountable {
final class PercolateQuery extends Query implements Accountable {
// cost of matching the query against the document, arbitrary as it would be really complex to estimate
public static final float MATCH_COST = 1000;
public static class Builder {
private final String docType;
private final QueryStore queryStore;
private final BytesReference documentSource;
private final IndexSearcher percolatorIndexSearcher;
private Query queriesMetaDataQuery;
private Query verifiedQueriesQuery = new MatchNoDocsQuery("");
private Query percolateTypeQuery;
/**
* @param docType The type of the document being percolated
* @param queryStore The lookup holding all the percolator queries as Lucene queries.
* @param documentSource The source of the document being percolated
* @param percolatorIndexSearcher The index searcher on top of the in-memory index that holds the document being percolated
*/
public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) {
this.docType = Objects.requireNonNull(docType);
this.queryStore = Objects.requireNonNull(queryStore);
this.documentSource = Objects.requireNonNull(documentSource);
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
}
/**
* Optionally sets a query that reduces the number of queries to percolate based on extracted terms from
* the document to be percolated.
* @param extractedTermsFieldName The name of the field to get the extracted terms from
* @param extractionResultField The field to indicate for a document whether query term extraction was complete,
* partial or failed. If query extraction was complete, the MemoryIndex doesn't
*/
public void extractQueryTermsQuery(String extractedTermsFieldName, String extractionResultField) throws IOException {
// We can only skip the MemoryIndex verification when percolating a single document.
// When the document being percolated contains a nested object field then the MemoryIndex contains multiple
// documents. In this case the term query that indicates whether memory index verification can be skipped
// can incorrectly indicate that non nested queries would match, while their nested variants would not.
if (percolatorIndexSearcher.getIndexReader().maxDoc() == 1) {
this.verifiedQueriesQuery = new TermQuery(new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_COMPLETE));
}
this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery(
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName,
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_FAILED)
);
}
/**
* @param percolateTypeQuery A query that identifies all document containing percolator queries
*/
public void setPercolateTypeQuery(Query percolateTypeQuery) {
this.percolateTypeQuery = Objects.requireNonNull(percolateTypeQuery);
}
public PercolateQuery build() {
if (percolateTypeQuery != null && queriesMetaDataQuery != null) {
throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata");
}
// The query that selects which percolator queries will be evaluated by MemoryIndex:
BooleanQuery.Builder queriesQuery = new BooleanQuery.Builder();
if (percolateTypeQuery != null) {
queriesQuery.add(percolateTypeQuery, FILTER);
}
if (queriesMetaDataQuery != null) {
queriesQuery.add(queriesMetaDataQuery, FILTER);
}
return new PercolateQuery(docType, queryStore, documentSource, queriesQuery.build(), percolatorIndexSearcher,
verifiedQueriesQuery);
}
}
private final String documentType;
private final QueryStore queryStore;
private final BytesReference documentSource;
private final Query percolatorQueriesQuery;
private final Query verifiedQueriesQuery;
private final Query candidateMatchesQuery;
private final Query verifiedMatchesQuery;
private final IndexSearcher percolatorIndexSearcher;
private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedQueriesQuery) {
this.documentType = documentType;
this.documentSource = documentSource;
this.percolatorQueriesQuery = percolatorQueriesQuery;
this.queryStore = queryStore;
this.percolatorIndexSearcher = percolatorIndexSearcher;
this.verifiedQueriesQuery = verifiedQueriesQuery;
PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
Query candidateMatchesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedMatchesQuery) {
this.documentType = Objects.requireNonNull(documentType);
this.documentSource = Objects.requireNonNull(documentSource);
this.candidateMatchesQuery = Objects.requireNonNull(candidateMatchesQuery);
this.queryStore = Objects.requireNonNull(queryStore);
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
this.verifiedMatchesQuery = Objects.requireNonNull(verifiedMatchesQuery);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = percolatorQueriesQuery.rewrite(reader);
if (rewritten != percolatorQueriesQuery) {
Query rewritten = candidateMatchesQuery.rewrite(reader);
if (rewritten != candidateMatchesQuery) {
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher,
verifiedQueriesQuery);
verifiedMatchesQuery);
} else {
return this;
}
@ -152,8 +74,8 @@ public final class PercolateQuery extends Query implements Accountable {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final Weight verifiedQueriesQueryWeight = verifiedQueriesQuery.createWeight(searcher, false);
final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores);
final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, false);
final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, false);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> set) {
@ -183,17 +105,17 @@ public final class PercolateQuery extends Query implements Accountable {
@Override
public float getValueForNormalization() throws IOException {
return innerWeight.getValueForNormalization();
return candidateMatchesWeight.getValueForNormalization();
}
@Override
public void normalize(float v, float v1) {
innerWeight.normalize(v, v1);
candidateMatchesWeight.normalize(v, v1);
}
@Override
public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException {
final Scorer approximation = innerWeight.scorer(leafReaderContext);
final Scorer approximation = candidateMatchesWeight.scorer(leafReaderContext);
if (approximation == null) {
return null;
}
@ -226,7 +148,7 @@ public final class PercolateQuery extends Query implements Accountable {
}
};
} else {
Scorer verifiedDocsScorer = verifiedQueriesQueryWeight.scorer(leafReaderContext);
Scorer verifiedDocsScorer = verifiedMatchesWeight.scorer(leafReaderContext);
Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {
@ -293,7 +215,7 @@ public final class PercolateQuery extends Query implements Accountable {
@Override
public String toString(String s) {
return "PercolateQuery{document_type={" + documentType + "},document_source={" + documentSource.utf8ToString() +
"},inner={" + percolatorQueriesQuery.toString(s) + "}}";
"},inner={" + candidateMatchesQuery.toString(s) + "}}";
}
@Override

View File

@ -50,6 +50,7 @@ import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.xcontent.XContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -57,7 +58,6 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.FieldNameAnalyzer;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperForType;
@ -406,37 +406,27 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
docSearcher.setQueryCache(null);
}
IndexSettings indexSettings = context.getIndexSettings();
boolean mapUnmappedFieldsAsString = indexSettings.getValue(PercolatorFieldMapper.INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING);
return buildQuery(indexSettings.getIndexVersionCreated(), context, docSearcher, mapUnmappedFieldsAsString);
}
Query buildQuery(Version indexVersionCreated, QueryShardContext context, IndexSearcher docSearcher,
boolean mapUnmappedFieldsAsString) throws IOException {
Version indexVersionCreated = context.getIndexSettings().getIndexVersionCreated();
boolean mapUnmappedFieldsAsString = context.getIndexSettings()
.getValue(PercolatorFieldMapper.INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING);
if (indexVersionCreated.onOrAfter(Version.V_5_0_0_alpha1)) {
MappedFieldType fieldType = context.fieldMapper(field);
if (fieldType == null) {
throw new QueryShardException(context, "field [" + field + "] does not exist");
}
if (!(fieldType instanceof PercolatorFieldMapper.PercolatorFieldType)) {
if (!(fieldType instanceof PercolatorFieldMapper.FieldType)) {
throw new QueryShardException(context, "expected field [" + field +
"] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
}
PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
PercolatorFieldMapper.FieldType pft = (PercolatorFieldMapper.FieldType) fieldType;
PercolateQuery.QueryStore queryStore = createStore(pft, context, mapUnmappedFieldsAsString);
PercolateQuery.Builder builder = new PercolateQuery.Builder(
documentType, queryStore, document, docSearcher
);
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getExtractionResultFieldName());
return builder.build();
return pft.percolateQuery(documentType, queryStore, document, docSearcher);
} else {
Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME));
PercolateQuery.Builder builder = new PercolateQuery.Builder(
documentType, createLegacyStore(context, mapUnmappedFieldsAsString), document, docSearcher
);
builder.setPercolateTypeQuery(percolateTypeQuery);
return builder.build();
PercolateQuery.QueryStore queryStore = createLegacyStore(context, mapUnmappedFieldsAsString);
return new PercolateQuery(documentType, queryStore, document, percolateTypeQuery, docSearcher,
new MatchNoDocsQuery("pre 5.0.0-alpha1 index, no verified matches"));
}
}
@ -477,17 +467,17 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
}
}
private static PercolateQuery.QueryStore createStore(PercolatorFieldMapper.PercolatorFieldType fieldType,
private static PercolateQuery.QueryStore createStore(PercolatorFieldMapper.FieldType fieldType,
QueryShardContext context,
boolean mapUnmappedFieldsAsString) {
return ctx -> {
LeafReader leafReader = ctx.reader();
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues(fieldType.getQueryBuilderFieldName());
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues(fieldType.queryBuilderField.name());
if (binaryDocValues == null) {
return docId -> null;
}
Bits bits = leafReader.getDocsWithField(fieldType.getQueryBuilderFieldName());
Bits bits = leafReader.getDocsWithField(fieldType.queryBuilderField.name());
return docId -> {
if (bits.get(docId)) {
BytesRef qbSource = binaryDocValues.get(docId);

View File

@ -20,10 +20,22 @@ package org.elasticsearch.percolator;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -38,12 +50,18 @@ import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.core.BinaryFieldMapper;
import org.elasticsearch.index.mapper.core.KeywordFieldMapper;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.BoostingQueryBuilder;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
@ -56,7 +74,12 @@ public class PercolatorFieldMapper extends FieldMapper {
public static final Setting<Boolean> INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING =
Setting.boolSetting("index.percolator.map_unmapped_fields_as_string", false, Setting.Property.IndexScope);
public static final String CONTENT_TYPE = "percolator";
private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType();
private static final FieldType FIELD_TYPE = new FieldType();
static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
static final String EXTRACTION_COMPLETE = "complete";
static final String EXTRACTION_PARTIAL = "partial";
static final String EXTRACTION_FAILED = "failed";
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
@ -74,12 +97,13 @@ public class PercolatorFieldMapper extends FieldMapper {
@Override
public PercolatorFieldMapper build(BuilderContext context) {
context.path().add(name());
FieldType fieldType = (FieldType) this.fieldType;
KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType();
fieldType.queryTermsField = extractedTermsField.fieldType();
KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context);
((PercolatorFieldType) fieldType).extractionResultField = extractionResultField.fieldType();
fieldType.extractionResultField = extractionResultField.fieldType();
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType();
fieldType.queryBuilderField = queryBuilderField.fieldType();
context.path().remove();
setupFieldType(context);
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
@ -114,40 +138,28 @@ public class PercolatorFieldMapper extends FieldMapper {
}
}
public static class PercolatorFieldType extends MappedFieldType {
public static class FieldType extends MappedFieldType {
private MappedFieldType queryTermsField;
private MappedFieldType extractionResultField;
private MappedFieldType queryBuilderField;
MappedFieldType queryTermsField;
MappedFieldType extractionResultField;
MappedFieldType queryBuilderField;
public PercolatorFieldType() {
public FieldType() {
setIndexOptions(IndexOptions.NONE);
setDocValuesType(DocValuesType.NONE);
setStored(false);
}
public PercolatorFieldType(PercolatorFieldType ref) {
public FieldType(FieldType ref) {
super(ref);
queryTermsField = ref.queryTermsField;
extractionResultField = ref.extractionResultField;
queryBuilderField = ref.queryBuilderField;
}
public String getExtractedTermsField() {
return queryTermsField.name();
}
public String getExtractionResultFieldName() {
return extractionResultField.name();
}
public String getQueryBuilderFieldName() {
return queryBuilderField.name();
}
@Override
public MappedFieldType clone() {
return new PercolatorFieldType(this);
return new FieldType(this);
}
@Override
@ -159,6 +171,52 @@ public class PercolatorFieldMapper extends FieldMapper {
public Query termQuery(Object value, QueryShardContext context) {
throw new QueryShardException(context, "Percolator fields are not searchable directly, use a percolate query instead");
}
public Query percolateQuery(String documentType, PercolateQuery.QueryStore queryStore, BytesReference documentSource,
IndexSearcher searcher) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
Query candidateMatchesQuery = createCandidateQuery(indexReader);
Query verifiedMatchesQuery;
// We can only skip the MemoryIndex verification when percolating a single document.
// When the document being percolated contains a nested object field then the MemoryIndex contains multiple
// documents. In this case the term query that indicates whether memory index verification can be skipped
// can incorrectly indicate that non nested queries would match, while their nested variants would not.
if (indexReader.maxDoc() == 1) {
verifiedMatchesQuery = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_COMPLETE));
} else {
verifiedMatchesQuery = new MatchNoDocsQuery("nested docs, so no verified matches");
}
return new PercolateQuery(documentType, queryStore, documentSource, candidateMatchesQuery, searcher, verifiedMatchesQuery);
}
Query createCandidateQuery(IndexReader indexReader) throws IOException {
List<Term> extractedTerms = new ArrayList<>();
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
extractedTerms.add(new Term(extractionResultField.name(), EXTRACTION_FAILED));
LeafReader reader = indexReader.leaves().get(0).reader();
Fields fields = reader.fields();
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
BytesRef fieldBr = new BytesRef(field);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(new Term(queryTermsField.name(), builder.toBytesRef()));
}
}
return new TermsQuery(extractedTerms);
}
}
private final boolean mapUnmappedFieldAsString;
@ -211,6 +269,7 @@ public class PercolatorFieldMapper extends FieldMapper {
XContentParser parser = context.parser();
QueryBuilder queryBuilder = parseQueryBuilder(queryShardContext.newParseContext(parser), parser.getTokenLocation());
verifyRangeQueries(queryBuilder);
// Fetching of terms, shapes and indexed scripts happen during this rewrite:
queryBuilder = queryBuilder.rewrite(queryShardContext);
@ -222,11 +281,34 @@ public class PercolatorFieldMapper extends FieldMapper {
}
Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder);
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), extractionResultField.name(),
queryTermsField.fieldType());
processQuery(query, context);
return null;
}
void processQuery(Query query, ParseContext context) {
ParseContext.Document doc = context.doc();
FieldType pft = (FieldType) this.fieldType();
QueryAnalyzer.Result result;
try {
result = QueryAnalyzer.analyze(query);
} catch (QueryAnalyzer.UnsupportedQueryException e) {
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
return;
}
for (Term term : result.terms) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes());
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
}
if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
} else {
doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
}
}
public static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException {
return toQuery(context, mapUnmappedFieldsAsString, parseQueryBuilder(context.newParseContext(parser), parser.getTokenLocation()));
}
@ -273,4 +355,38 @@ public class PercolatorFieldMapper extends FieldMapper {
return CONTENT_TYPE;
}
/**
* Fails if a range query with a date range is found based on current time
*/
static void verifyRangeQueries(QueryBuilder queryBuilder) {
if (queryBuilder instanceof RangeQueryBuilder) {
RangeQueryBuilder rangeQueryBuilder = (RangeQueryBuilder) queryBuilder;
if (rangeQueryBuilder.from() instanceof String) {
String from = (String) rangeQueryBuilder.from();
String to = (String) rangeQueryBuilder.to();
if (from.contains("now") || to.contains("now")) {
throw new IllegalArgumentException("Percolator queries containing time range queries based on the " +
"current time are forbidden");
}
}
} else if (queryBuilder instanceof BoolQueryBuilder) {
BoolQueryBuilder boolQueryBuilder = (BoolQueryBuilder) queryBuilder;
List<QueryBuilder> clauses = new ArrayList<>();
clauses.addAll(boolQueryBuilder.filter());
clauses.addAll(boolQueryBuilder.must());
clauses.addAll(boolQueryBuilder.mustNot());
clauses.addAll(boolQueryBuilder.should());
for (QueryBuilder clause : clauses) {
verifyRangeQueries(clause);
}
} else if (queryBuilder instanceof ConstantScoreQueryBuilder) {
verifyRangeQueries(((ConstantScoreQueryBuilder) queryBuilder).innerQuery());
} else if (queryBuilder instanceof FunctionScoreQueryBuilder) {
verifyRangeQueries(((FunctionScoreQueryBuilder) queryBuilder).query());
} else if (queryBuilder instanceof BoostingQueryBuilder) {
verifyRangeQueries(((BoostingQueryBuilder) queryBuilder).negativeQuery());
verifyRangeQueries(((BoostingQueryBuilder) queryBuilder).positiveQuery());
}
}
}

View File

@ -18,15 +18,8 @@
*/
package org.elasticsearch.percolator;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.queries.TermsQuery;
@ -46,37 +39,25 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import org.elasticsearch.index.mapper.ParseContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
/**
* Utility to extract query terms from queries and create queries from documents.
*/
public final class ExtractQueryTermsService {
public final class QueryAnalyzer {
private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
public static final String EXTRACTION_COMPLETE = "complete";
public static final String EXTRACTION_PARTIAL = "partial";
public static final String EXTRACTION_FAILED = "failed";
static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
private static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
static {
Map<Class<? extends Query>, Function<Query, Result>> map = new HashMap<>(17);
Map<Class<? extends Query>, Function<Query, Result>> map = new HashMap<>();
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
map.put(ConstantScoreQuery.class, constantScoreQuery());
map.put(BoostQuery.class, boostQuery());
@ -97,83 +78,34 @@ public final class ExtractQueryTermsService {
queryProcessors = Collections.unmodifiableMap(map);
}
private ExtractQueryTermsService() {
private QueryAnalyzer() {
}
/**
* Extracts all terms from the specified query and adds it to the specified document.
* Extracts terms from the provided query. These terms are stored with the percolator query and
* used by the percolate query's candidate query as fields to be query by. The candidate query
* holds the terms from the document to be percolated and allows to the percolate query to ignore
* percolator queries that we know would otherwise never match.
*
* @param query The query to extract terms from
* @param document The document to add the extracted terms to
* @param queryTermsFieldField The field in the document holding the extracted terms
* @param extractionResultField The field contains whether query term extraction was successful, partial or
* failed. (For example the query contained an unsupported query (e.g. WildcardQuery)
* then query extraction would fail)
* @param fieldType The field type for the query metadata field
*/
public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField,
String extractionResultField, FieldType fieldType) {
Result result;
try {
result = extractQueryTerms(query);
} catch (UnsupportedQueryException e) {
document.add(new Field(extractionResultField, EXTRACTION_FAILED, fieldType));
return;
}
for (Term term : result.terms) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes());
document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType));
}
if (result.verified) {
document.add(new Field(extractionResultField, EXTRACTION_COMPLETE, fieldType));
} else {
document.add(new Field(extractionResultField, EXTRACTION_PARTIAL, fieldType));
}
}
/**
* Creates a terms query containing all terms from all fields of the specified index reader.
*/
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
Term... optionalTerms) throws IOException {
Objects.requireNonNull(queryMetadataField);
List<Term> extractedTerms = new ArrayList<>();
Collections.addAll(extractedTerms, optionalTerms);
Fields fields = MultiFields.getFields(indexReader);
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
BytesRef fieldBr = new BytesRef(field);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
}
}
return new TermsQuery(extractedTerms);
}
/**
* Extracts all query terms from the provided query and adds it to specified list.
* <p>
* From boolean query with no should clauses or phrase queries only the longest term are selected,
* When extracting the terms for the specified query, we can also determine if the percolator query is
* always going to match. For example if a percolator query just contains a term query or a disjunction
* query then when the candidate query matches with that, we know the entire percolator query always
* matches. This allows the percolate query to skip the expensive memory index verification step that
* it would otherwise have to execute (for example when a percolator query contains a phrase query or a
* conjunction query).
*
* <p>
* The query analyzer doesn't always extract all terms from the specified query. For example from a
* boolean query with no should clauses or phrase queries only the longest term are selected,
* since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
*
* <p>
* If from part of the query, no query terms can be extracted then term extraction is stopped and
* an UnsupportedQueryException is thrown.
* Sometimes the query analyzer can't always extract terms from a sub query, if that happens then
* query analysis is stopped and an UnsupportedQueryException is thrown. So that the caller can mark
* this query in such a way that the PercolatorQuery always verifies if this query with the MemoryIndex.
*/
static Result extractQueryTerms(Query query) {
public static Result analyze(Query query) {
Class queryClass = query.getClass();
if (queryClass.isAnonymousClass()) {
// Sometimes queries have anonymous classes in that case we need the direct super class.
@ -195,14 +127,14 @@ public final class ExtractQueryTermsService {
static Function<Query, Result> constantScoreQuery() {
return query -> {
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
return analyze(wrappedQuery);
};
}
static Function<Query, Result> boostQuery() {
return query -> {
Query wrappedQuery = ((BoostQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
return analyze(wrappedQuery);
};
}
@ -277,7 +209,7 @@ public final class ExtractQueryTermsService {
Set<Term> bestClauses = null;
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
for (SpanQuery clause : spanNearQuery.getClauses()) {
Result temp = extractQueryTerms(clause);
Result temp = analyze(clause);
bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses);
}
return new Result(false, bestClauses);
@ -289,7 +221,7 @@ public final class ExtractQueryTermsService {
Set<Term> terms = new HashSet<>();
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
for (SpanQuery clause : spanOrQuery.getClauses()) {
terms.addAll(extractQueryTerms(clause).terms);
terms.addAll(analyze(clause).terms);
}
return new Result(false, terms);
};
@ -297,14 +229,14 @@ public final class ExtractQueryTermsService {
static Function<Query, Result> spanNotQuery() {
return query -> {
Result result = extractQueryTerms(((SpanNotQuery) query).getInclude());
Result result = analyze(((SpanNotQuery) query).getInclude());
return new Result(false, result.terms);
};
}
static Function<Query, Result> spanFirstQuery() {
return query -> {
Result result = extractQueryTerms(((SpanFirstQuery) query).getMatch());
Result result = analyze(((SpanFirstQuery) query).getMatch());
return new Result(false, result.terms);
};
}
@ -341,7 +273,7 @@ public final class ExtractQueryTermsService {
Result temp;
try {
temp = extractQueryTerms(clause.getQuery());
temp = analyze(clause.getQuery());
} catch (UnsupportedQueryException e) {
uqe = e;
continue;
@ -381,7 +313,7 @@ public final class ExtractQueryTermsService {
static Function<Query, Result> functionScoreQuery() {
return query -> {
FunctionScoreQuery functionScoreQuery = (FunctionScoreQuery) query;
Result result = extractQueryTerms(functionScoreQuery.getSubQuery());
Result result = analyze(functionScoreQuery.getSubQuery());
// If min_score is specified we can't guarantee upfront that this percolator query matches,
// so in that case we set verified to false.
// (if it matches with the percolator document matches with the extracted terms.
@ -395,7 +327,7 @@ public final class ExtractQueryTermsService {
boolean verified = minimumShouldMatch <= 1 && otherClauses == false;
Set<Term> terms = new HashSet<>();
for (Query disjunct : disjunctions) {
Result subResult = extractQueryTerms(disjunct);
Result subResult = analyze(disjunct);
if (subResult.verified == false) {
verified = false;
}

View File

@ -33,6 +33,7 @@ import org.elasticsearch.rest.action.support.RestToXContentListener;
import static org.elasticsearch.rest.RestRequest.Method.GET;
import static org.elasticsearch.rest.RestRequest.Method.POST;
@Deprecated
public class RestMultiPercolateAction extends BaseRestHandler {
private final boolean allowExplicitIndex;

View File

@ -35,6 +35,7 @@ import org.elasticsearch.rest.action.support.RestToXContentListener;
import static org.elasticsearch.rest.RestRequest.Method.GET;
import static org.elasticsearch.rest.RestRequest.Method.POST;
@Deprecated
public class RestPercolateAction extends BaseRestHandler {
@Inject
public RestPercolateAction(Settings settings, RestController controller) {

View File

@ -49,6 +49,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Deprecated
public class TransportMultiPercolateAction extends HandledTransportAction<MultiPercolateRequest, MultiPercolateResponse> {
private final Client client;

View File

@ -57,6 +57,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@Deprecated
public class TransportPercolateAction extends HandledTransportAction<PercolateRequest, PercolateResponse> {
private final Client client;

View File

@ -0,0 +1,436 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.percolator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.junit.After;
import org.junit.Before;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import static org.hamcrest.Matchers.equalTo;
public class CandidateQueryTests extends ESSingleNodeTestCase {
private Directory directory;
private IndexWriter indexWriter;
private DocumentMapper documentMapper;
private DirectoryReader directoryReader;
private MapperService mapperService;
private PercolatorFieldMapper fieldMapper;
private PercolatorFieldMapper.FieldType fieldType;
private List<Query> queries;
private PercolateQuery.QueryStore queryStore;
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Collections.singleton(PercolatorPlugin.class);
}
@Before
public void init() throws Exception {
directory = newDirectory();
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
config.setMergePolicy(NoMergePolicy.INSTANCE);
indexWriter = new IndexWriter(directory, config);
String indexName = "test";
IndexService indexService = createIndex(indexName, Settings.EMPTY);
mapperService = indexService.mapperService();
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("int_field").field("type", "integer").endObject()
.startObject("long_field").field("type", "long").endObject()
.startObject("half_float_field").field("type", "half_float").endObject()
.startObject("float_field").field("type", "float").endObject()
.startObject("double_field").field("type", "double").endObject()
.startObject("ip_field").field("type", "ip").endObject()
.startObject("field").field("type", "keyword").endObject()
.endObject().endObject().endObject().string();
documentMapper = mapperService.merge("type", new CompressedXContent(mapper), MapperService.MergeReason.MAPPING_UPDATE, true);
String queryField = "query_field";
String mappingType = "query";
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(mappingType)
.startObject("properties").startObject(queryField).field("type", "percolator").endObject().endObject()
.endObject().endObject().string();
mapperService.merge(mappingType, new CompressedXContent(percolatorMapper), MapperService.MergeReason.MAPPING_UPDATE, true);
fieldMapper = (PercolatorFieldMapper) mapperService.documentMapper(mappingType).mappers().getMapper(queryField);
fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType();
queries = new ArrayList<>();
queryStore = ctx -> docId -> this.queries.get(docId);
}
@After
public void deinit() throws Exception {
directoryReader.close();
directory.close();
}
public void testDuel() throws Exception {
List<Function<String, Query>> queryFunctions = new ArrayList<>();
queryFunctions.add((id) -> new PrefixQuery(new Term("field", id)));
queryFunctions.add((id) -> new WildcardQuery(new Term("field", id + "*")));
queryFunctions.add((id) -> new CustomQuery(new Term("field", id)));
queryFunctions.add((id) -> new SpanTermQuery(new Term("field", id)));
queryFunctions.add((id) -> new TermQuery(new Term("field", id)));
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
return builder.build();
});
queryFunctions.add((id) -> new MatchAllDocsQuery());
queryFunctions.add((id) -> new MatchNoDocsQuery("no reason at all"));
int numDocs = randomIntBetween(queryFunctions.size(), queryFunctions.size() * 3);
List<ParseContext.Document> documents = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
Query query = queryFunctions.get(i % queryFunctions.size()).apply(id);
addQuery(query, documents);
}
indexWriter.addDocuments(documents);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
Iterable<? extends IndexableField> doc = Collections.singleton(new StringField("field", id, Field.Store.NO));
MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
}
Iterable<? extends IndexableField> doc = Collections.singleton(new StringField("field", "value", Field.Store.NO));
MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
// Empty percolator doc:
memoryIndex = new MemoryIndex();
duelRun(queryStore, memoryIndex, shardSearcher);
}
public void testDuelSpecificQueries() throws Exception {
List<ParseContext.Document> documents = new ArrayList<>();
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
commonTermsQuery.add(new Term("field", "quick"));
commonTermsQuery.add(new Term("field", "brown"));
commonTermsQuery.add(new Term("field", "fox"));
addQuery(commonTermsQuery, documents);
BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery(new Term[]{new Term("field", "quick"),
new Term("field", "brown"), new Term("field", "fox")}, false);
addQuery(blendedTermQuery, documents);
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true)
.addClause(new SpanTermQuery(new Term("field", "quick")))
.addClause(new SpanTermQuery(new Term("field", "brown")))
.addClause(new SpanTermQuery(new Term("field", "fox")))
.build();
addQuery(spanNearQuery, documents);
SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true)
.addClause(new SpanTermQuery(new Term("field", "the")))
.addClause(new SpanTermQuery(new Term("field", "lazy")))
.addClause(new SpanTermQuery(new Term("field", "doc")))
.build();
SpanOrQuery spanOrQuery = new SpanOrQuery(
spanNearQuery,
spanNearQuery2
);
addQuery(spanOrQuery, documents);
SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery);
addQuery(spanNotQuery, documents);
long lowerLong = randomIntBetween(0, 256);
long upperLong = lowerLong + randomIntBetween(0, 32);
addQuery(LongPoint.newRangeQuery("long_field", lowerLong, upperLong), documents);
indexWriter.addDocuments(documents);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
Document document = new Document();
document.add(new TextField("field", "the quick brown fox jumps over the lazy dog", Field.Store.NO));
long randomLong = randomIntBetween((int) lowerLong, (int) upperLong);
document.add(new LongPoint("long_field", randomLong));
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
}
private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
boolean requireScore = randomBoolean();
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
Query percolateQuery = fieldType.percolateQuery("type", queryStore, new BytesArray("{}"), percolateSearcher);
Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery);
TopDocs topDocs = shardSearcher.search(query, 10);
Query controlQuery = new ControlQuery(memoryIndex, queryStore);
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
for (int j = 0; j < topDocs.scoreDocs.length; j++) {
assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
if (requireScore) {
Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
assertThat(explain1.getValue(), equalTo(explain2.getValue()));
}
}
}
private void addQuery(Query query, List<ParseContext.Document> docs) throws IOException {
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
mapperService.documentMapperParser(), documentMapper, null, null);
fieldMapper.processQuery(query, parseContext);
docs.add(parseContext.doc());
queries.add(query);
}
private static final class CustomQuery extends Query {
private final Term term;
private CustomQuery(Term term) {
this.term = term;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
return new TermQuery(term);
}
@Override
public String toString(String field) {
return "custom{" + field + "}";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return classHash();
}
}
private static final class ControlQuery extends Query {
private final MemoryIndex memoryIndex;
private final PercolateQuery.QueryStore queryStore;
private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
this.memoryIndex = memoryIndex;
this.queryStore = queryStore;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
return new ConstantScoreWeight(this) {
float _score;
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer != null) {
int result = scorer.iterator().advance(doc);
if (result == doc) {
return Explanation.match(scorer.score(), "ControlQuery");
}
}
return Explanation.noMatch("ControlQuery");
}
@Override
public String toString() {
return "weight(" + ControlQuery.this + ")";
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context);
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
@Override
protected boolean match(int doc) {
try {
Query query = leaf.getQuery(doc);
float score = memoryIndex.search(query);
if (score != 0f) {
if (needsScores) {
_score = score;
}
return true;
} else {
return false;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) {
@Override
public float score() throws IOException {
return _score;
}
};
}
};
}
@Override
public String toString(String field) {
return "control{" + field + "}";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return classHash();
}
}
}

View File

@ -21,60 +21,36 @@ package org.elasticsearch.percolator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.test.ESTestCase;
import org.junit.After;
import org.junit.Before;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import static org.hamcrest.Matchers.arrayWithSize;
import static org.hamcrest.Matchers.equalTo;
@ -82,34 +58,13 @@ import static org.hamcrest.Matchers.is;
public class PercolateQueryTests extends ESTestCase {
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
public static final String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
public static final FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
static {
EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false);
EXTRACTED_TERMS_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
EXTRACTED_TERMS_FIELD_TYPE.freeze();
}
private Directory directory;
private IndexWriter indexWriter;
private Map<String, Query> queries;
private PercolateQuery.QueryStore queryStore;
private DirectoryReader directoryReader;
@Before
public void init() throws Exception {
directory = newDirectory();
queries = new HashMap<>();
queryStore = ctx -> docId -> {
try {
String val = ctx.reader().document(docId).get(UidFieldMapper.NAME);
return queries.get(Uid.createUid(val).id());
} catch (IOException e) {
throw new RuntimeException(e);
}
};
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
config.setMergePolicy(NoMergePolicy.INSTANCE);
indexWriter = new IndexWriter(directory, config);
@ -121,31 +76,38 @@ public class PercolateQueryTests extends ESTestCase {
directory.close();
}
public void testVariousQueries() throws Exception {
addPercolatorQuery("1", new TermQuery(new Term("field", "brown")));
addPercolatorQuery("2", new TermQuery(new Term("field", "monkey")));
addPercolatorQuery("3", new TermQuery(new Term("field", "fox")));
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD);
bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD);
addPercolatorQuery("4", bq1.build());
BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST);
addPercolatorQuery("5", bq2.build());
BooleanQuery.Builder bq3 = new BooleanQuery.Builder();
bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT);
addPercolatorQuery("6", bq3.build());
BooleanQuery.Builder bq4 = new BooleanQuery.Builder();
bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT);
bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST);
addPercolatorQuery("7", bq4.build());
PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
pq1.add(new Term("field", "lazy"));
pq1.add(new Term("field", "dog"));
addPercolatorQuery("8", pq1.build());
public void testPercolateQuery() throws Exception {
List<Iterable<? extends IndexableField>> docs = new ArrayList<>();
List<Query> queries = new ArrayList<>();
PercolateQuery.QueryStore queryStore = ctx -> queries::get;
queries.add(new TermQuery(new Term("field", "fox")));
docs.add(Collections.singleton(new StringField("select", "a", Field.Store.NO)));
SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
snp.addClause(new SpanTermQuery(new Term("field", "dog")));
snp.setSlop(2);
queries.add(snp.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
pq1.add(new Term("field", "quick"));
pq1.add(new Term("field", "brown"));
pq1.add(new Term("field", "jumps"));
pq1.setSlop(1);
queries.add(pq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
queries.add(bq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
@ -153,26 +115,26 @@ public class PercolateQueryTests extends ESTestCase {
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery.Builder builder = new PercolateQuery.Builder(
"docType",
queryStore,
new BytesArray("{}"),
percolateSearcher
);
builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
// no scoring, wrapping it in a constant score query:
Query query = new ConstantScoreQuery(builder.build());
Query query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("a"),
new TermQuery(new Term("select", "a")), percolateSearcher, new MatchNoDocsQuery("")));
TopDocs topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(5));
assertThat(topDocs.scoreDocs.length, equalTo(5));
assertThat(topDocs.totalHits, equalTo(1));
assertThat(topDocs.scoreDocs.length, equalTo(1));
assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
Explanation explanation = shardSearcher.explain(query, 0);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("b"),
new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery("")));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(false));
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
@ -180,371 +142,37 @@ public class PercolateQueryTests extends ESTestCase {
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
explanation = shardSearcher.explain(query, 3);
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
explanation = shardSearcher.explain(query, 4);
assertThat(explanation.isMatch(), is(false));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("c"),
new MatchAllDocsQuery(), percolateSearcher, new MatchAllDocsQuery()));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(4));
assertThat(topDocs.scoreDocs[3].doc, equalTo(5));
explanation = shardSearcher.explain(query, 5);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[3].score));
explanation = shardSearcher.explain(query, 6);
assertThat(explanation.isMatch(), is(false));
assertThat(topDocs.scoreDocs[4].doc, equalTo(7));
explanation = shardSearcher.explain(query, 7);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[4].score));
}
public void testVariousQueries_withScoring() throws Exception {
SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
snp.addClause(new SpanTermQuery(new Term("field", "dog")));
snp.setSlop(2);
addPercolatorQuery("1", snp.build());
PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
pq1.add(new Term("field", "quick"));
pq1.add(new Term("field", "brown"));
pq1.add(new Term("field", "jumps"));
pq1.setSlop(1);
addPercolatorQuery("2", pq1.build());
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
addPercolatorQuery("3", bq1.build());
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery.Builder builder = new PercolateQuery.Builder(
"docType",
queryStore,
new BytesArray("{}"),
percolateSearcher
);
builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
Query query = builder.build();
TopDocs topDocs = shardSearcher.search(query, 10);
query = new PercolateQuery("type", queryStore, new BytesArray("{}"), new TermQuery(new Term("select", "b")),
percolateSearcher, new MatchNoDocsQuery(""));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(2));
Explanation explanation = shardSearcher.explain(query, 2);
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(3));
explanation = shardSearcher.explain(query, 3);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[1].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[2].doc, equalTo(0));
explanation = shardSearcher.explain(query, 0);
assertThat(topDocs.scoreDocs[2].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
}
public void testDuel() throws Exception {
List<Function<String, Query>> queries = new ArrayList<>();
queries.add((id) -> new PrefixQuery(new Term("field", id)));
queries.add((id) -> new WildcardQuery(new Term("field", id + "*")));
queries.add((id) -> new CustomQuery(new Term("field", id)));
queries.add((id) -> new SpanTermQuery(new Term("field", id)));
queries.add((id) -> new TermQuery(new Term("field", id)));
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
return builder.build();
});
queries.add((id) -> new MatchAllDocsQuery());
queries.add((id) -> new MatchNoDocsQuery("no reason at all"));
int numDocs = randomIntBetween(queries.size(), queries.size() * 3);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
addPercolatorQuery(id, queries.get(i % queries.size()).apply(id));
}
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", id, new WhitespaceAnalyzer());
duelRun(memoryIndex, shardSearcher);
}
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
duelRun(memoryIndex, shardSearcher);
// Empty percolator doc:
memoryIndex = new MemoryIndex();
duelRun(memoryIndex, shardSearcher);
}
public void testDuelSpecificQueries() throws Exception {
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
commonTermsQuery.add(new Term("field", "quick"));
commonTermsQuery.add(new Term("field", "brown"));
commonTermsQuery.add(new Term("field", "fox"));
addPercolatorQuery("_id1", commonTermsQuery);
BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery(new Term[]{new Term("field", "quick"),
new Term("field", "brown"), new Term("field", "fox")}, false);
addPercolatorQuery("_id2", blendedTermQuery);
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true)
.addClause(new SpanTermQuery(new Term("field", "quick")))
.addClause(new SpanTermQuery(new Term("field", "brown")))
.addClause(new SpanTermQuery(new Term("field", "fox")))
.build();
addPercolatorQuery("_id3", spanNearQuery);
SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true)
.addClause(new SpanTermQuery(new Term("field", "the")))
.addClause(new SpanTermQuery(new Term("field", "lazy")))
.addClause(new SpanTermQuery(new Term("field", "doc")))
.build();
SpanOrQuery spanOrQuery = new SpanOrQuery(
spanNearQuery,
spanNearQuery2
);
addPercolatorQuery("_id4", spanOrQuery);
SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery);
addPercolatorQuery("_id5", spanNotQuery);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
duelRun(memoryIndex, shardSearcher);
}
void addPercolatorQuery(String id, Query query, String... extraFields) throws IOException {
queries.put(id, query);
ParseContext.Document document = new ParseContext.Document();
ExtractQueryTermsService.extractQueryTerms(query, document, EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME,
EXTRACTED_TERMS_FIELD_TYPE);
document.add(new StoredField(UidFieldMapper.NAME, Uid.createUid(MapperService.PERCOLATOR_LEGACY_TYPE_NAME, id)));
assert extraFields.length % 2 == 0;
for (int i = 0; i < extraFields.length; i++) {
document.add(new StringField(extraFields[i], extraFields[++i], Field.Store.NO));
}
indexWriter.addDocument(document);
}
private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
boolean requireScore = randomBoolean();
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery.Builder builder = new PercolateQuery.Builder(
"docType",
queryStore,
new BytesArray("{}"),
percolateSearcher
);
// enables the optimization that prevents queries from being evaluated that don't match
builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
Query query = requireScore ? builder.build() : new ConstantScoreQuery(builder.build());
TopDocs topDocs = shardSearcher.search(query, 10);
Query controlQuery = new ControlQuery(memoryIndex, queryStore);
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
for (int j = 0; j < topDocs.scoreDocs.length; j++) {
assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
if (requireScore) {
Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
assertThat(explain1.getValue(), equalTo(explain2.getValue()));
}
}
}
private static final class CustomQuery extends Query {
private final Term term;
private CustomQuery(Term term) {
this.term = term;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
return new TermQuery(term);
}
@Override
public String toString(String field) {
return "custom{" + field + "}";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return classHash();
}
}
private static final class ControlQuery extends Query {
private final MemoryIndex memoryIndex;
private final PercolateQuery.QueryStore queryStore;
private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
this.memoryIndex = memoryIndex;
this.queryStore = queryStore;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
return new ConstantScoreWeight(this) {
float _score;
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer != null) {
int result = scorer.iterator().advance(doc);
if (result == doc) {
return Explanation.match(scorer.score(), "ControlQuery");
}
}
return Explanation.noMatch("ControlQuery");
}
@Override
public String toString() {
return "weight(" + ControlQuery.this + ")";
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context);
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
@Override
protected boolean match(int doc) {
try {
Query query = leaf.getQuery(doc);
float score = memoryIndex.search(query);
if (score != 0f) {
if (needsScores) {
_score = score;
}
return true;
} else {
return false;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) {
@Override
public float score() throws IOException {
return _score;
}
};
}
};
}
@Override
public String toString(String field) {
return "control{" + field + "}";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return classHash();
}
}
}

View File

@ -19,26 +19,51 @@
package org.elasticsearch.percolator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.BoostingQueryBuilder;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import org.elasticsearch.index.query.functionscore.RandomScoreFunctionBuilder;
import org.elasticsearch.indices.TermsLookup;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
@ -49,8 +74,9 @@ import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
import static org.elasticsearch.percolator.PercolatorFieldMapper.EXTRACTION_COMPLETE;
import static org.elasticsearch.percolator.PercolatorFieldMapper.EXTRACTION_FAILED;
import static org.elasticsearch.percolator.PercolatorFieldMapper.EXTRACTION_PARTIAL;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
@ -61,7 +87,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
private String fieldName;
private IndexService indexService;
private MapperService mapperService;
private PercolatorFieldMapper.PercolatorFieldType fieldType;
private PercolatorFieldMapper.FieldType fieldType;
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
@ -77,6 +103,10 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
.startObject("properties")
.startObject("field").field("type", "text").endObject()
.startObject("field1").field("type", "text").endObject()
.startObject("field2").field("type", "text").endObject()
.startObject("_field3").field("type", "text").endObject()
.startObject("field4").field("type", "text").endObject()
.startObject("number_field").field("type", "long").endObject()
.startObject("date_field").field("type", "date").endObject()
.endObject().endObject().endObject().string();
@ -90,7 +120,101 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.startObject("properties").startObject(fieldName).field("type", "percolator").endObject().endObject()
.endObject().endObject().string();
mapperService.merge(typeName, new CompressedXContent(percolatorMapper), MapperService.MergeReason.MAPPING_UPDATE, true);
fieldType = (PercolatorFieldMapper.PercolatorFieldType) mapperService.fullName(fieldName);
fieldType = (PercolatorFieldMapper.FieldType) mapperService.fullName(fieldName);
}
public void testExtractTerms() throws Exception {
addQueryMapping();
BooleanQuery.Builder bq = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("field", "term1"));
bq.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("field", "term2"));
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
DocumentMapper documentMapper = mapperService.documentMapper(typeName);
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
mapperService.documentMapperParser(), documentMapper, null, null);
fieldMapper.processQuery(bq.build(), parseContext);
ParseContext.Document document = parseContext.doc();
PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType();
assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_COMPLETE));
List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(fieldType.queryTermsField.name())));
Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
assertThat(fields.size(), equalTo(2));
assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field\u0000term1"));
assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field\u0000term2"));
}
public void testExtractTermsAndRanges_failed() throws Exception {
addQueryMapping();
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
DocumentMapper documentMapper = mapperService.documentMapper(typeName);
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
mapperService.documentMapperParser(), documentMapper, null, null);
fieldMapper.processQuery(query, parseContext);
ParseContext.Document document = parseContext.doc();
PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType();
assertThat(document.getFields().size(), equalTo(1));
assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_FAILED));
}
public void testExtractTermsAndRanges_partial() throws Exception {
addQueryMapping();
PhraseQuery phraseQuery = new PhraseQuery("field", "term");
DocumentMapper documentMapper = mapperService.documentMapper(typeName);
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
mapperService.documentMapperParser(), documentMapper, null, null);
fieldMapper.processQuery(phraseQuery, parseContext);
ParseContext.Document document = parseContext.doc();
PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType();
assertThat(document.getFields().size(), equalTo(2));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term"));
assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_PARTIAL));
}
public void testCreateCandidateQuery() throws Exception {
addQueryMapping();
MemoryIndex memoryIndex = new MemoryIndex(false);
memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
TermsQuery termsQuery = (TermsQuery) fieldType.createCandidateQuery(indexReader);
PrefixCodedTerms terms = termsQuery.getTermData();
assertThat(terms.size(), equalTo(15L));
PrefixCodedTerms.TermIterator termIterator = terms.iterator();
assertTermIterator(termIterator, "_field3\u0000me", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "_field3\u0000unhide", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000brown", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000dog", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000fox", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000jumps", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000lazy", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000over", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000quick", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1\u0000the", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2\u0000more", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2\u0000some", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2\u0000text", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field4\u0000123", fieldType.queryTermsField.name());
assertTermIterator(termIterator, EXTRACTION_FAILED, fieldType.extractionResultField.name());
}
private void assertTermIterator(PrefixCodedTerms.TermIterator termIterator, String expectedValue, String expectedField) {
assertThat(termIterator.next().utf8ToString(), equalTo(expectedValue));
assertThat(termIterator.field(), equalTo(expectedField));
}
public void testPercolatorFieldMapper() throws Exception {
@ -100,12 +224,13 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.field(fieldName, queryBuilder)
.endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_COMPLETE));
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name())[0].stringValue(),
equalTo(EXTRACTION_COMPLETE));
BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder);
// add an query for which we don't extract terms from
@ -113,11 +238,12 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
.field(fieldName, queryBuilder)
.endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_FAILED));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name())[0].stringValue(),
equalTo(EXTRACTION_FAILED));
assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(1));
qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder);
}
@ -136,7 +262,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
XContentFactory.jsonBuilder().startObject()
.field(fieldName, query)
.endObject().bytes());
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, query);
}
}
@ -148,7 +274,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
ParsedDocument doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
.field(fieldName, queryBuilder)
.endObject().bytes());
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder.rewrite(indexService.newQueryShardContext()));
}
@ -169,7 +295,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
addQueryMapping();
ParsedDocument doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
.endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(0));
try {
mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
@ -275,6 +401,53 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
assertThat(e.getCause().getMessage(), equalTo("a document can only contain one percolator query"));
}
public void testRangeQueryWithNowRangeIsForbidden() throws Exception {
addQueryMapping();
MapperParsingException e = expectThrows(MapperParsingException.class, () -> {
mapperService.documentMapper(typeName).parse("test", typeName, "1",
jsonBuilder().startObject()
.field(fieldName, rangeQuery("date_field").from("2016-01-01||/D").to("now"))
.endObject().bytes());
}
);
assertThat(e.getCause(), instanceOf(IllegalArgumentException.class));
e = expectThrows(MapperParsingException.class, () -> {
mapperService.documentMapper(typeName).parse("test", typeName, "1",
jsonBuilder().startObject()
.field(fieldName, rangeQuery("date_field").from("2016-01-01||/D").to("now/D"))
.endObject().bytes());
}
);
assertThat(e.getCause(), instanceOf(IllegalArgumentException.class));
e = expectThrows(MapperParsingException.class, () -> {
mapperService.documentMapper(typeName).parse("test", typeName, "1",
jsonBuilder().startObject()
.field(fieldName, rangeQuery("date_field").from("now-1d").to("now"))
.endObject().bytes());
}
);
assertThat(e.getCause(), instanceOf(IllegalArgumentException.class));
}
public void testVerifyRangeQueries() {
RangeQueryBuilder rangeQuery1 = new RangeQueryBuilder("field").from("2016-01-01||/D").to("2017-01-01||/D");
RangeQueryBuilder rangeQuery2 = new RangeQueryBuilder("field").from("2016-01-01||/D").to("now");
PercolatorFieldMapper.verifyRangeQueries(rangeQuery1);
expectThrows(IllegalArgumentException.class, () -> PercolatorFieldMapper.verifyRangeQueries(rangeQuery2));
PercolatorFieldMapper.verifyRangeQueries(new BoolQueryBuilder().must(rangeQuery1));
expectThrows(IllegalArgumentException.class, () ->
PercolatorFieldMapper.verifyRangeQueries(new BoolQueryBuilder().must(rangeQuery2)));
PercolatorFieldMapper.verifyRangeQueries(new ConstantScoreQueryBuilder((rangeQuery1)));
expectThrows(IllegalArgumentException.class, () ->
PercolatorFieldMapper.verifyRangeQueries(new ConstantScoreQueryBuilder(rangeQuery2)));
PercolatorFieldMapper.verifyRangeQueries(new BoostingQueryBuilder(rangeQuery1, new MatchAllQueryBuilder()));
expectThrows(IllegalArgumentException.class, () ->
PercolatorFieldMapper.verifyRangeQueries(new BoostingQueryBuilder(rangeQuery2, new MatchAllQueryBuilder())));
PercolatorFieldMapper.verifyRangeQueries(new FunctionScoreQueryBuilder(rangeQuery1, new RandomScoreFunctionBuilder()));
expectThrows(IllegalArgumentException.class, () ->
PercolatorFieldMapper.verifyRangeQueries(new FunctionScoreQueryBuilder(rangeQuery2, new RandomScoreFunctionBuilder())));
}
private void assertQueryBuilder(BytesRef actual, QueryBuilder expected) throws IOException {
XContentParser sourceParser = PercolatorFieldMapper.QUERY_BUILDER_CONTENT_TYPE.xContent()
.createParser(actual.bytes, actual.offset, actual.length);

View File

@ -45,10 +45,9 @@ import static org.hamcrest.Matchers.sameInstance;
public class PercolatorHighlightSubFetchPhaseTests extends ESTestCase {
public void testHitsExecutionNeeded() {
PercolateQuery percolateQuery = new PercolateQuery.Builder("", ctx -> null, new BytesArray("{}"),
Mockito.mock(IndexSearcher.class))
.build();
PercolateQuery percolateQuery = new PercolateQuery(
"", ctx -> null, new BytesArray("{}"), new MatchAllDocsQuery(), Mockito.mock(IndexSearcher.class), new MatchAllDocsQuery()
);
PercolatorHighlightSubFetchPhase subFetchPhase = new PercolatorHighlightSubFetchPhase(Settings.EMPTY,
new Highlighters(Settings.EMPTY));
SearchContext searchContext = Mockito.mock(SearchContext.class);
@ -61,10 +60,9 @@ public class PercolatorHighlightSubFetchPhaseTests extends ESTestCase {
}
public void testLocatePercolatorQuery() {
PercolateQuery percolateQuery = new PercolateQuery.Builder("", ctx -> null, new BytesArray("{}"),
Mockito.mock(IndexSearcher.class))
.build();
PercolateQuery percolateQuery = new PercolateQuery(
"", ctx -> null, new BytesArray("{}"), new MatchAllDocsQuery(), Mockito.mock(IndexSearcher.class), new MatchAllDocsQuery()
);
assertThat(PercolatorHighlightSubFetchPhase.locatePercolatorQuery(new MatchAllDocsQuery()), nullValue());
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER);

View File

@ -1553,31 +1553,6 @@ public class PercolatorIT extends ESIntegTestCase {
}
}
public void testPercolatorQueryWithNowRange() throws Exception {
client().admin().indices().prepareCreate(INDEX_NAME)
.addMapping("my-type", "timestamp", "type=date,format=epoch_millis")
.addMapping(TYPE_NAME, "query", "type=percolator")
.get();
ensureGreen();
client().prepareIndex(INDEX_NAME, TYPE_NAME, "1")
.setSource(jsonBuilder().startObject().field("query", rangeQuery("timestamp").from("now-1d").to("now")).endObject())
.get();
client().prepareIndex(INDEX_NAME, TYPE_NAME, "2")
.setSource(jsonBuilder().startObject().field("query", constantScoreQuery(rangeQuery("timestamp").from("now-1d").to("now"))).endObject())
.get();
refresh();
logger.info("--> Percolate doc with field1=b");
PercolateResponse response = preparePercolate(client())
.setIndices(INDEX_NAME).setDocumentType("my-type")
.setPercolateDoc(docBuilder().setDoc("timestamp", System.currentTimeMillis()))
.get();
assertMatchCount(response, 2L);
assertThat(response.getMatches(), arrayWithSize(2));
assertThat(convertFromTextArray(response.getMatches(), INDEX_NAME), arrayContainingInAnyOrder("1", "2"));
}
void initNestedIndexAndPercolation() throws IOException {
XContentBuilder mapping = XContentFactory.jsonBuilder();
mapping.startObject().startObject("properties").startObject("companyname").field("type", "text").endObject()

View File

@ -44,6 +44,7 @@ import static org.elasticsearch.index.query.QueryBuilders.commonTermsQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.spanNearQuery;
import static org.elasticsearch.index.query.QueryBuilders.spanNotQuery;
import static org.elasticsearch.index.query.QueryBuilders.spanTermQuery;
@ -109,6 +110,102 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
assertThat(response.getHits().getAt(2).getId(), equalTo("3"));
}
public void testPercolatorRangeQueries() throws Exception {
createIndex("test", client().admin().indices().prepareCreate("test")
.addMapping("type", "field1", "type=long", "field2", "type=double", "field3", "type=ip")
.addMapping("queries", "query", "type=percolator")
);
client().prepareIndex("test", "queries", "1")
.setSource(jsonBuilder().startObject().field("query", rangeQuery("field1").from(10).to(12)).endObject())
.get();
client().prepareIndex("test", "queries", "2")
.setSource(jsonBuilder().startObject().field("query", rangeQuery("field1").from(20).to(22)).endObject())
.get();
client().prepareIndex("test", "queries", "3")
.setSource(jsonBuilder().startObject().field("query", boolQuery()
.must(rangeQuery("field1").from(10).to(12))
.must(rangeQuery("field1").from(12).to(14))
).endObject()).get();
client().admin().indices().prepareRefresh().get();
client().prepareIndex("test", "queries", "4")
.setSource(jsonBuilder().startObject().field("query", rangeQuery("field2").from(10).to(12)).endObject())
.get();
client().prepareIndex("test", "queries", "5")
.setSource(jsonBuilder().startObject().field("query", rangeQuery("field2").from(20).to(22)).endObject())
.get();
client().prepareIndex("test", "queries", "6")
.setSource(jsonBuilder().startObject().field("query", boolQuery()
.must(rangeQuery("field2").from(10).to(12))
.must(rangeQuery("field2").from(12).to(14))
).endObject()).get();
client().admin().indices().prepareRefresh().get();
client().prepareIndex("test", "queries", "7")
.setSource(jsonBuilder().startObject()
.field("query", rangeQuery("field3").from("192.168.1.0").to("192.168.1.5"))
.endObject())
.get();
client().prepareIndex("test", "queries", "8")
.setSource(jsonBuilder().startObject()
.field("query", rangeQuery("field3").from("192.168.1.20").to("192.168.1.30"))
.endObject())
.get();
client().prepareIndex("test", "queries", "9")
.setSource(jsonBuilder().startObject().field("query", boolQuery()
.must(rangeQuery("field3").from("192.168.1.0").to("192.168.1.5"))
.must(rangeQuery("field3").from("192.168.1.5").to("192.168.1.10"))
).endObject()).get();
client().admin().indices().prepareRefresh().get();
// Test long range:
BytesReference source = jsonBuilder().startObject().field("field1", 12).endObject().bytes();
SearchResponse response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "type", source))
.get();
assertHitCount(response, 2);
assertThat(response.getHits().getAt(0).getId(), equalTo("3"));
assertThat(response.getHits().getAt(1).getId(), equalTo("1"));
source = jsonBuilder().startObject().field("field1", 11).endObject().bytes();
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "type", source))
.get();
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("1"));
// Test double range:
source = jsonBuilder().startObject().field("field2", 12).endObject().bytes();
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "type", source))
.get();
assertHitCount(response, 2);
assertThat(response.getHits().getAt(0).getId(), equalTo("6"));
assertThat(response.getHits().getAt(1).getId(), equalTo("4"));
source = jsonBuilder().startObject().field("field2", 11).endObject().bytes();
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "type", source))
.get();
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("4"));
// Test IP range:
source = jsonBuilder().startObject().field("field3", "192.168.1.5").endObject().bytes();
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "type", source))
.get();
assertHitCount(response, 2);
assertThat(response.getHits().getAt(0).getId(), equalTo("9"));
assertThat(response.getHits().getAt(1).getId(), equalTo("7"));
source = jsonBuilder().startObject().field("field3", "192.168.1.4").endObject().bytes();
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "type", source))
.get();
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("7"));
}
public void testPercolatorQueryExistingDocument() throws Exception {
createIndex("test", client().admin().indices().prepareCreate("test")
.addMapping("type", "field1", "type=keyword", "field2", "type=keyword")

View File

@ -18,14 +18,7 @@
*/
package org.elasticsearch.percolator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.queries.TermsQuery;
@ -48,11 +41,9 @@ import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import org.elasticsearch.common.lucene.search.function.RandomScoreFunction;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.percolator.ExtractQueryTermsService.Result;
import org.elasticsearch.percolator.QueryAnalyzer.Result;
import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -60,70 +51,18 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_PARTIAL;
import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException;
import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms;
import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery;
import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm;
import static org.elasticsearch.percolator.QueryAnalyzer.UnsupportedQueryException;
import static org.elasticsearch.percolator.QueryAnalyzer.analyze;
import static org.elasticsearch.percolator.QueryAnalyzer.selectTermListWithTheLongestShortestTerm;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;
public class ExtractQueryTermsServiceTests extends ESTestCase {
public static final String QUERY_TERMS_FIELD = "extracted_terms";
public static final String EXTRACTION_RESULT_FIELD = "extraction_result";
public static final FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
static {
QUERY_TERMS_FIELD_TYPE.setTokenized(false);
QUERY_TERMS_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
QUERY_TERMS_FIELD_TYPE.freeze();
}
public void testExtractQueryMetadata() {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("field1", "term1"));
bq.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("field2", "term2"));
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_COMPLETE));
List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(QUERY_TERMS_FIELD)));
Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
assertThat(fields.size(), equalTo(2));
assertThat(fields.get(0).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
assertThat(fields.get(1).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
}
public void testExtractQueryMetadata_unsupported() {
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(query, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getFields().size(), equalTo(1));
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_FAILED));
}
public void testExtractQueryMetadata_notVerified() {
PhraseQuery phraseQuery = new PhraseQuery("field", "term");
ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(phraseQuery, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getFields().size(), equalTo(2));
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term"));
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_PARTIAL));
}
public class QueryAnalyzerTests extends ESTestCase {
public void testExtractQueryMetadata_termQuery() {
TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
Result result = extractQueryTerms(termQuery);
Result result = analyze(termQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
@ -133,7 +72,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_termsQuery() {
TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
Result result = extractQueryTerms(termsQuery);
Result result = analyze(termsQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -145,7 +84,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
// test with different fields
termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
result = extractQueryTerms(termsQuery);
result = analyze(termsQuery);
assertThat(result.verified, is(true));
terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -158,7 +97,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_phraseQuery() {
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
Result result = extractQueryTerms(phraseQuery);
Result result = analyze(phraseQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
@ -181,7 +120,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build();
Result result = extractQueryTerms(booleanQuery);
Result result = analyze(booleanQuery);
assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -209,7 +148,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build();
Result result = extractQueryTerms(booleanQuery);
Result result = analyze(booleanQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -232,7 +171,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(phraseQuery, BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build();
Result result = extractQueryTerms(booleanQuery);
Result result = analyze(booleanQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
@ -246,58 +185,58 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2"));
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
Result result = extractQueryTerms(builder.build());
Result result = analyze(builder.build());
assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2");
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
result = analyze(builder.build());
assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4");
builder.add(phraseQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
result = analyze(builder.build());
assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, BooleanClause.Occur.MUST_NOT);
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
result = analyze(builder.build());
assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32));
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
result = analyze(builder.build());
assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
result = extractQueryTerms(builder.build());
result = analyze(builder.build());
assertThat("Single required clause, so candidate matches are verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
result = extractQueryTerms(builder.build());
result = analyze(builder.build());
assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
builder.add(termQuery2, BooleanClause.Occur.MUST_NOT);
result = extractQueryTerms(builder.build());
result = analyze(builder.build());
assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false));
}
public void testExtractQueryMetadata_constantScoreQuery() {
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
Result result = extractQueryTerms(constantScoreQuery);
Result result = analyze(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
@ -308,7 +247,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_boostQuery() {
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
Result result = extractQueryTerms(constantScoreQuery);
Result result = analyze(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
@ -320,7 +259,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
commonTermsQuery.add(new Term("_field", "_term1"));
commonTermsQuery.add(new Term("_field", "_term2"));
Result result = extractQueryTerms(commonTermsQuery);
Result result = analyze(commonTermsQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -334,7 +273,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_blendedTermQuery() {
Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(termsArr, false);
Result result = extractQueryTerms(commonTermsQuery);
Result result = analyze(commonTermsQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -358,7 +297,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
// 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
Result result = extractQueryTerms(spanTermQuery1);
Result result = analyze(spanTermQuery1);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
}
@ -369,7 +308,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
Result result = extractQueryTerms(spanNearQuery);
Result result = analyze(spanNearQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery2.getTerm());
}
@ -378,7 +317,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
Result result = extractQueryTerms(spanOrQuery);
Result result = analyze(spanOrQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
}
@ -386,7 +325,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_spanFirstQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
Result result = extractQueryTerms(spanFirstQuery);
Result result = analyze(spanFirstQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
}
@ -395,27 +334,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
Result result = extractQueryTerms(spanNotQuery);
Result result = analyze(spanNotQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
}
public void testExtractQueryMetadata_matchNoDocsQuery() {
Result result = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all"));
assertThat(result.verified, is(true));
assertEquals(0, result.terms.size());
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
result = extractQueryTerms(bq.build());
result = analyze(bq.build());
assertThat(result.verified, is(false));
assertEquals(0, result.terms.size());
bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
result = extractQueryTerms(bq.build());
result = analyze(bq.build());
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
@ -423,18 +362,18 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
1f
);
result = extractQueryTerms(disjunctionMaxQuery);
result = analyze(disjunctionMaxQuery);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
}
public void testExtractQueryMetadata_matchAllDocsQuery() {
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(new MatchAllDocsQuery()));
expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery()));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
Result result = extractQueryTerms(builder.build());
Result result = analyze(builder.build());
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, new Term("field", "value"));
@ -443,39 +382,39 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq1));
expectThrows(UnsupportedQueryException.class, () -> analyze(bq1));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq2 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2));
expectThrows(UnsupportedQueryException.class, () -> analyze(bq2));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq3 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq3));
expectThrows(UnsupportedQueryException.class, () -> analyze(bq3));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq4 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq4));
expectThrows(UnsupportedQueryException.class, () -> analyze(bq4));
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq5 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq5));
expectThrows(UnsupportedQueryException.class, () -> analyze(bq5));
}
public void testExtractQueryMetadata_unsupportedQuery() {
TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false);
UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(termRangeQuery));
UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(termRangeQuery));
assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery));
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
@ -484,7 +423,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(termRangeQuery, BooleanClause.Occur.SHOULD);
BooleanQuery bq = builder.build();
e = expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq));
e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq));
assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery));
}
@ -497,7 +436,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build();
Result result = extractQueryTerms(bq1);
Result result = analyze(bq1);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery1.getTerm());
@ -507,7 +446,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(termQuery2, BooleanClause.Occur.MUST);
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
bq1 = builder.build();
result = extractQueryTerms(bq1);
result = analyze(bq1);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery2.getTerm());
@ -515,7 +454,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
BooleanQuery bq2 = builder.build();
UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2));
UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq2));
assertThat(e.getUnsupportedQuery(), sameInstance(unsupportedQuery));
}
@ -528,7 +467,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
);
Result result = extractQueryTerms(disjunctionMaxQuery);
Result result = analyze(disjunctionMaxQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -546,7 +485,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
);
result = extractQueryTerms(disjunctionMaxQuery);
result = analyze(disjunctionMaxQuery);
assertThat(result.verified, is(false));
terms = new ArrayList<>(result.terms);
Collections.sort(terms);
@ -563,12 +502,12 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testSynonymQuery() {
SynonymQuery query = new SynonymQuery();
Result result = extractQueryTerms(query);
Result result = analyze(query);
assertThat(result.verified, is(true));
assertThat(result.terms.isEmpty(), is(true));
query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2"));
result = extractQueryTerms(query);
result = analyze(query);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("_field", "_value1"), new Term("_field", "_value2"));
}
@ -576,47 +515,16 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testFunctionScoreQuery() {
TermQuery termQuery = new TermQuery(new Term("_field", "_value"));
FunctionScoreQuery functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction());
Result result = extractQueryTerms(functionScoreQuery);
Result result = analyze(functionScoreQuery);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("_field", "_value"));
functionScoreQuery = new FunctionScoreQuery(termQuery, new RandomScoreFunction(), 1f, null, 10f);
result = extractQueryTerms(functionScoreQuery);
result = analyze(functionScoreQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, new Term("_field", "_value"));
}
public void testCreateQueryMetadataQuery() throws Exception {
MemoryIndex memoryIndex = new MemoryIndex(false);
memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
TermsQuery query = (TermsQuery)
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));
PrefixCodedTerms terms = query.getTermData();
assertThat(terms.size(), equalTo(15L));
PrefixCodedTerms.TermIterator termIterator = terms.iterator();
assertTermIterator(termIterator, "_field3\u0000me", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "_field3\u0000unhide", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000brown", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000dog", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000fox", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000jumps", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000lazy", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000over", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000quick", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field1\u0000the", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field2\u0000more", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
}
public void testSelectTermsListWithHighestSumOfTermLength() {
Set<Term> terms1 = new HashSet<>();
int shortestTerms1Length = Integer.MAX_VALUE;
@ -643,11 +551,6 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
assertThat(result, sameInstance(expected));
}
private void assertTermIterator(PrefixCodedTerms.TermIterator termIterator, String expectedValue, String expectedField) {
assertThat(termIterator.next().utf8ToString(), equalTo(expectedValue));
assertThat(termIterator.field(), equalTo(expectedField));
}
private static void assertTermsEqual(Set<Term> actual, Term... expected) {
assertEquals(new HashSet<>(Arrays.asList(expected)), actual);
}