percolator: Don't verify candidate matches with MemoryIndex that are verified matches

If we don't care about scoring then for certain candidate matches we can be certain, that if they are a candidate match,
then they will always match. So verifying these queries with the MemoryIndex can be skipped.
This commit is contained in:
Martijn van Groningen 2016-06-02 12:53:05 +02:00
parent 6d5b4a78fe
commit 599a548998
9 changed files with 799 additions and 247 deletions

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.lucene;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
@ -27,6 +28,8 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoDeletionPolicy;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
@ -35,9 +38,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
@ -49,9 +54,6 @@ import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
/**
*
*/
public class LuceneTests extends ESTestCase {
public void testWaitForIndex() throws Exception {
final MockDirectoryWrapper dir = newMockDirectory();
@ -355,6 +357,45 @@ public class LuceneTests extends ESTestCase {
dir.close();
}
public void testAsSequentialAccessBits() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
Document doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
doc = new Document();
w.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
try (DirectoryReader reader = DirectoryReader.open(w)) {
IndexSearcher searcher = newSearcher(reader);
Weight termWeight = new TermQuery(new Term("foo", "bar")).createWeight(searcher, false);
assertEquals(1, reader.leaves().size());
LeafReaderContext leafReaderContext = reader.leaves().get(0);
Bits bits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), termWeight.scorer(leafReaderContext));
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(-1));
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(leafReaderContext.reader().maxDoc()));
assertTrue(bits.get(0));
assertTrue(bits.get(0));
assertFalse(bits.get(1));
assertFalse(bits.get(1));
expectThrows(IllegalArgumentException.class, () -> bits.get(0));
assertTrue(bits.get(2));
assertTrue(bits.get(2));
expectThrows(IllegalArgumentException.class, () -> bits.get(1));
}
w.close();
dir.close();
}
/**
* Test that the "unmap hack" is detected as supported by lucene.
* This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038

View File

@ -53,10 +53,13 @@ import org.elasticsearch.index.mapper.ParseContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
/**
* Utility to extract query terms from queries and create queries from documents.
@ -64,64 +67,179 @@ import java.util.Set;
public final class ExtractQueryTermsService {
private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
public static final String EXTRACTION_COMPLETE = "complete";
public static final String EXTRACTION_PARTIAL = "partial";
public static final String EXTRACTION_FAILED = "failed";
static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
static {
Map<Class<? extends Query>, Function<Query, Result>> map = new HashMap<>(16);
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
map.put(ConstantScoreQuery.class, constantScoreQuery());
map.put(BoostQuery.class, boostQuery());
map.put(TermQuery.class, termQuery());
map.put(TermsQuery.class, termsQuery());
map.put(CommonTermsQuery.class, commonTermsQuery());
map.put(BlendedTermQuery.class, blendedTermQuery());
map.put(PhraseQuery.class, phraseQuery());
map.put(SpanTermQuery.class, spanTermQuery());
map.put(SpanNearQuery.class, spanNearQuery());
map.put(SpanOrQuery.class, spanOrQuery());
map.put(SpanFirstQuery.class, spanFirstQuery());
map.put(SpanNotQuery.class, spanNotQuery());
map.put(BooleanQuery.class, booleanQuery());
map.put(DisjunctionMaxQuery.class, disjunctionMaxQuery());
queryProcessors = Collections.unmodifiableMap(map);
}
private ExtractQueryTermsService() {
}
/**
* Extracts all terms from the specified query and adds it to the specified document.
*
* @param query The query to extract terms from
* @param document The document to add the extracted terms to
* @param queryTermsFieldField The field in the document holding the extracted terms
* @param unknownQueryField The field used to mark a document that not all query terms could be extracted.
* For example the query contained an unsupported query (e.g. WildcardQuery).
* @param fieldType The field type for the query metadata field
* @param extractionResultField The field contains whether query term extraction was successful, partial or
* failed. (For example the query contained an unsupported query (e.g. WildcardQuery)
* then query extraction would fail)
* @param fieldType The field type for the query metadata field
*/
public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField,
String unknownQueryField, FieldType fieldType) {
Set<Term> queryTerms;
String extractionResultField, FieldType fieldType) {
Result result;
try {
queryTerms = extractQueryTerms(query);
result = extractQueryTerms(query);
} catch (UnsupportedQueryException e) {
document.add(new Field(unknownQueryField, new BytesRef(), fieldType));
document.add(new Field(extractionResultField, EXTRACTION_FAILED, fieldType));
return;
}
for (Term term : queryTerms) {
for (Term term : result.terms) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes());
document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType));
}
if (result.verified) {
document.add(new Field(extractionResultField, EXTRACTION_COMPLETE, fieldType));
} else {
document.add(new Field(extractionResultField, EXTRACTION_PARTIAL, fieldType));
}
}
/**
* Creates a terms query containing all terms from all fields of the specified index reader.
*/
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
Term... optionalTerms) throws IOException {
Objects.requireNonNull(queryMetadataField);
List<Term> extractedTerms = new ArrayList<>();
Collections.addAll(extractedTerms, optionalTerms);
Fields fields = MultiFields.getFields(indexReader);
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
BytesRef fieldBr = new BytesRef(field);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
}
}
return new TermsQuery(extractedTerms);
}
/**
* Extracts all query terms from the provided query and adds it to specified list.
*
* <p>
* From boolean query with no should clauses or phrase queries only the longest term are selected,
* since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
*
* <p>
* If from part of the query, no query terms can be extracted then term extraction is stopped and
* an UnsupportedQueryException is thrown.
*/
static Set<Term> extractQueryTerms(Query query) {
if (query instanceof MatchNoDocsQuery) {
// no terms to extract as this query matches no docs
return Collections.emptySet();
} else if (query instanceof TermQuery) {
return Collections.singleton(((TermQuery) query).getTerm());
} else if (query instanceof TermsQuery) {
Set<Term> terms = new HashSet<>();
static Result extractQueryTerms(Query query) {
Class queryClass = query.getClass();
if (queryClass.isAnonymousClass()) {
// Sometimes queries have anonymous classes in that case we need the direct super class.
// (for example blended term query)
queryClass = queryClass.getSuperclass();
}
Function<Query, Result> queryProcessor = queryProcessors.get(queryClass);
if (queryProcessor != null) {
return queryProcessor.apply(query);
} else {
throw new UnsupportedQueryException(query);
}
}
static Function<Query, Result> matchNoDocsQuery() {
return (query -> new Result(true, Collections.emptySet()));
}
static Function<Query, Result> constantScoreQuery() {
return query -> {
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
};
}
static Function<Query, Result> boostQuery() {
return query -> {
Query wrappedQuery = ((BoostQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
};
}
static Function<Query, Result> termQuery() {
return (query -> {
TermQuery termQuery = (TermQuery) query;
return new Result(true, Collections.singleton(termQuery.getTerm()));
});
}
static Function<Query, Result> termsQuery() {
return query -> {
TermsQuery termsQuery = (TermsQuery) query;
Set<Term> terms = new HashSet<>();
PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
terms.add(new Term(iterator.field(), term));
}
return terms;
} else if (query instanceof PhraseQuery) {
return new Result(true, terms);
};
}
static Function<Query, Result> commonTermsQuery() {
return query -> {
List<Term> terms = ((CommonTermsQuery) query).getTerms();
return new Result(false, new HashSet<>(terms));
};
}
static Function<Query, Result> blendedTermQuery() {
return query -> {
List<Term> terms = ((BlendedTermQuery) query).getTerms();
return new Result(true, new HashSet<>(terms));
};
}
static Function<Query, Result> phraseQuery() {
return query -> {
Term[] terms = ((PhraseQuery) query).getTerms();
if (terms.length == 0) {
return Collections.emptySet();
return new Result(true, Collections.emptySet());
}
// the longest term is likely to be the rarest,
@ -132,19 +250,76 @@ public final class ExtractQueryTermsService {
longestTerm = term;
}
}
return Collections.singleton(longestTerm);
} else if (query instanceof BooleanQuery) {
List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
boolean hasRequiredClauses = false;
return new Result(false, Collections.singleton(longestTerm));
};
}
static Function<Query, Result> spanTermQuery() {
return query -> {
Term term = ((SpanTermQuery) query).getTerm();
return new Result(true, Collections.singleton(term));
};
}
static Function<Query, Result> spanNearQuery() {
return query -> {
Set<Term> bestClauses = null;
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
for (SpanQuery clause : spanNearQuery.getClauses()) {
Result temp = extractQueryTerms(clause);
bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses);
}
return new Result(false, bestClauses);
};
}
static Function<Query, Result> spanOrQuery() {
return query -> {
Set<Term> terms = new HashSet<>();
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
for (SpanQuery clause : spanOrQuery.getClauses()) {
terms.addAll(extractQueryTerms(clause).terms);
}
return new Result(false, terms);
};
}
static Function<Query, Result> spanNotQuery() {
return query -> {
Result result = extractQueryTerms(((SpanNotQuery) query).getInclude());
return new Result(false, result.terms);
};
}
static Function<Query, Result> spanFirstQuery() {
return query -> {
Result result = extractQueryTerms(((SpanFirstQuery) query).getMatch());
return new Result(false, result.terms);
};
}
static Function<Query, Result> booleanQuery() {
return query -> {
BooleanQuery bq = (BooleanQuery) query;
List<BooleanClause> clauses = bq.clauses();
int minimumShouldMatch = bq.getMinimumNumberShouldMatch();
int numRequiredClauses = 0;
int numOptionalClauses = 0;
int numProhibitedClauses = 0;
for (BooleanClause clause : clauses) {
if (clause.isRequired()) {
hasRequiredClauses = true;
break;
numRequiredClauses++;
}
if (clause.isProhibited()) {
numProhibitedClauses++;
}
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
numOptionalClauses++;
}
}
if (hasRequiredClauses) {
UnsupportedQueryException uqe = null;
if (numRequiredClauses > 0) {
Set<Term> bestClause = null;
UnsupportedQueryException uqe = null;
for (BooleanClause clause : clauses) {
if (clause.isRequired() == false) {
// skip must_not clauses, we don't need to remember the things that do *not* match...
@ -153,77 +328,56 @@ public final class ExtractQueryTermsService {
continue;
}
Set<Term> temp;
Result temp;
try {
temp = extractQueryTerms(clause.getQuery());
} catch (UnsupportedQueryException e) {
uqe = e;
continue;
}
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
bestClause = selectTermListWithTheLongestShortestTerm(temp.terms, bestClause);
}
if (bestClause != null) {
return bestClause;
return new Result(false, bestClause);
} else {
if (uqe != null) {
// we're unable to select the best clause and an exception occurred, so we bail
throw uqe;
} else {
// We didn't find a clause and no exception occurred, so this bq only contained MatchNoDocsQueries,
return new Result(true, Collections.emptySet());
}
return Collections.emptySet();
}
} else {
Set<Term> terms = new HashSet<>();
List<Query> disjunctions = new ArrayList<>(numOptionalClauses);
for (BooleanClause clause : clauses) {
if (clause.isProhibited()) {
// we don't need to remember the things that do *not* match...
continue;
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
disjunctions.add(clause.getQuery());
}
terms.addAll(extractQueryTerms(clause.getQuery()));
}
return terms;
return handleDisjunction(disjunctions, minimumShouldMatch, numProhibitedClauses > 0);
}
} else if (query instanceof ConstantScoreQuery) {
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
} else if (query instanceof BoostQuery) {
Query wrappedQuery = ((BoostQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
} else if (query instanceof CommonTermsQuery) {
List<Term> terms = ((CommonTermsQuery) query).getTerms();
return new HashSet<>(terms);
} else if (query instanceof BlendedTermQuery) {
List<Term> terms = ((BlendedTermQuery) query).getTerms();
return new HashSet<>(terms);
} else if (query instanceof DisjunctionMaxQuery) {
};
}
static Function<Query, Result> disjunctionMaxQuery() {
return query -> {
List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts();
Set<Term> terms = new HashSet<>();
for (Query disjunct : disjuncts) {
terms.addAll(extractQueryTerms(disjunct));
return handleDisjunction(disjuncts, 1, false);
};
}
static Result handleDisjunction(List<Query> disjunctions, int minimumShouldMatch, boolean otherClauses) {
boolean verified = minimumShouldMatch <= 1 && otherClauses == false;
Set<Term> terms = new HashSet<>();
for (Query disjunct : disjunctions) {
Result subResult = extractQueryTerms(disjunct);
if (subResult.verified == false) {
verified = false;
}
return terms;
} else if (query instanceof SpanTermQuery) {
return Collections.singleton(((SpanTermQuery) query).getTerm());
} else if (query instanceof SpanNearQuery) {
Set<Term> bestClause = null;
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
for (SpanQuery clause : spanNearQuery.getClauses()) {
Set<Term> temp = extractQueryTerms(clause);
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
}
return bestClause;
} else if (query instanceof SpanOrQuery) {
Set<Term> terms = new HashSet<>();
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
for (SpanQuery clause : spanOrQuery.getClauses()) {
terms.addAll(extractQueryTerms(clause));
}
return terms;
} else if (query instanceof SpanFirstQuery) {
return extractQueryTerms(((SpanFirstQuery)query).getMatch());
} else if (query instanceof SpanNotQuery) {
return extractQueryTerms(((SpanNotQuery) query).getInclude());
} else {
throw new UnsupportedQueryException(query);
terms.addAll(subResult.terms);
}
return new Result(verified, terms);
}
static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) {
@ -243,7 +397,7 @@ public final class ExtractQueryTermsService {
}
}
private static int minTermLength(Set<Term> terms) {
static int minTermLength(Set<Term> terms) {
int min = Integer.MAX_VALUE;
for (Term term : terms) {
min = Math.min(min, term.bytes().length);
@ -251,40 +405,22 @@ public final class ExtractQueryTermsService {
return min;
}
/**
* Creates a boolean query with a should clause for each term on all fields of the specified index reader.
*/
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
String unknownQueryField) throws IOException {
Objects.requireNonNull(queryMetadataField);
Objects.requireNonNull(unknownQueryField);
static class Result {
List<Term> extractedTerms = new ArrayList<>();
extractedTerms.add(new Term(unknownQueryField));
Fields fields = MultiFields.getFields(indexReader);
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
final Set<Term> terms;
final boolean verified;
BytesRef fieldBr = new BytesRef(field);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null ; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
}
Result(boolean verified, Set<Term> terms) {
this.terms = terms;
this.verified = verified;
}
return new TermsQuery(extractedTerms);
}
/**
* Exception indicating that none or some query terms couldn't extracted from a percolator query.
*/
public static class UnsupportedQueryException extends RuntimeException {
static class UnsupportedQueryException extends RuntimeException {
private final Query unsupportedQuery;

View File

@ -28,12 +28,15 @@ import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import java.io.IOException;
import java.util.Objects;
@ -54,6 +57,7 @@ public final class PercolateQuery extends Query implements Accountable {
private final IndexSearcher percolatorIndexSearcher;
private Query queriesMetaDataQuery;
private Query verifiedQueriesQuery = new MatchNoDocsQuery("");
private Query percolateTypeQuery;
/**
@ -64,21 +68,32 @@ public final class PercolateQuery extends Query implements Accountable {
*/
public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) {
this.docType = Objects.requireNonNull(docType);
this.queryStore = Objects.requireNonNull(queryStore);
this.documentSource = Objects.requireNonNull(documentSource);
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
this.queryStore = Objects.requireNonNull(queryStore);
}
/**
* Optionally sets a query that reduces the number of queries to percolate based on extracted terms from
* the document to be percolated.
*
* @param extractedTermsFieldName The name of the field to get the extracted terms from
* @param unknownQueryFieldname The field used to mark documents whose queries couldn't all get extracted
* @param extractedTermsFieldName The name of the field to get the extracted terms from
* @param extractionResultField The field to indicate for a document whether query term extraction was complete,
* partial or failed. If query extraction was complete, the MemoryIndex doesn't
*/
public void extractQueryTermsQuery(String extractedTermsFieldName, String unknownQueryFieldname) throws IOException {
public void extractQueryTermsQuery(String extractedTermsFieldName, String extractionResultField) throws IOException {
// We can only skip the MemoryIndex verification when percolating a single document.
// When the document being percolated contains a nested object field then the MemoryIndex contains multiple
// documents. In this case the term query that indicates whether memory index verification can be skipped
// can incorrectly indicate that non nested queries would match, while their nested variants would not.
if (percolatorIndexSearcher.getIndexReader().maxDoc() == 1) {
this.verifiedQueriesQuery = new TermQuery(new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_COMPLETE));
}
this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery(
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName, unknownQueryFieldname
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName,
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_FAILED)
);
}
@ -94,14 +109,15 @@ public final class PercolateQuery extends Query implements Accountable {
throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata");
}
// The query that selects which percolator queries will be evaluated by MemoryIndex:
BooleanQuery.Builder builder = new BooleanQuery.Builder();
BooleanQuery.Builder queriesQuery = new BooleanQuery.Builder();
if (percolateTypeQuery != null) {
builder.add(percolateTypeQuery, FILTER);
queriesQuery.add(percolateTypeQuery, FILTER);
}
if (queriesMetaDataQuery != null) {
builder.add(queriesMetaDataQuery, FILTER);
queriesQuery.add(queriesMetaDataQuery, FILTER);
}
return new PercolateQuery(docType, queryStore, documentSource, builder.build(), percolatorIndexSearcher);
return new PercolateQuery(docType, queryStore, documentSource, queriesQuery.build(), percolatorIndexSearcher,
verifiedQueriesQuery);
}
}
@ -110,22 +126,25 @@ public final class PercolateQuery extends Query implements Accountable {
private final QueryStore queryStore;
private final BytesReference documentSource;
private final Query percolatorQueriesQuery;
private final Query verifiedQueriesQuery;
private final IndexSearcher percolatorIndexSearcher;
private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher) {
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedQueriesQuery) {
this.documentType = documentType;
this.documentSource = documentSource;
this.percolatorQueriesQuery = percolatorQueriesQuery;
this.queryStore = queryStore;
this.percolatorIndexSearcher = percolatorIndexSearcher;
this.verifiedQueriesQuery = verifiedQueriesQuery;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = percolatorQueriesQuery.rewrite(reader);
if (rewritten != percolatorQueriesQuery) {
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher);
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher,
verifiedQueriesQuery);
} else {
return this;
}
@ -133,6 +152,7 @@ public final class PercolateQuery extends Query implements Accountable {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final Weight verifiedQueriesQueryWeight = verifiedQueriesQuery.createWeight(searcher, false);
final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores);
return new Weight(this) {
@Override
@ -206,6 +226,8 @@ public final class PercolateQuery extends Query implements Accountable {
}
};
} else {
Scorer verifiedDocsScorer = verifiedQueriesQueryWeight.scorer(leafReaderContext);
Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {
@Override
@ -214,6 +236,14 @@ public final class PercolateQuery extends Query implements Accountable {
}
boolean matchDocId(int docId) throws IOException {
// We use the verifiedDocsBits to skip the expensive MemoryIndex verification.
// If docId also appears in the verifiedDocsBits then that means during indexing
// we were able to extract all query terms and for this candidate match
// and we determined based on the nature of the query that it is safe to skip
// the MemoryIndex verification.
if (verifiedDocsBits.get(docId)) {
return true;
}
Query query = percolatorQueries.getQuery(docId);
return query != null && Lucene.exists(percolatorIndexSearcher, query);
}

View File

@ -420,10 +420,11 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
"] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
}
PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
PercolateQuery.QueryStore queryStore = createStore(pft, context, mapUnmappedFieldsAsString);
PercolateQuery.Builder builder = new PercolateQuery.Builder(
documentType, createStore(pft, context, mapUnmappedFieldsAsString), document, docSearcher
documentType, queryStore, document, docSearcher
);
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getUnknownQueryFieldName());
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getExtractionResultFieldName());
return builder.build();
} else {
Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME));

View File

@ -58,7 +58,7 @@ public class PercolatorFieldMapper extends FieldMapper {
private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType();
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
public static final String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
@ -75,15 +75,15 @@ public class PercolatorFieldMapper extends FieldMapper {
context.path().add(name());
KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType();
KeywordFieldMapper unknownQueryField = createExtractQueryFieldBuilder(UNKNOWN_QUERY_FIELD_NAME, context);
((PercolatorFieldType) fieldType).unknownQueryField = unknownQueryField.fieldType();
KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context);
((PercolatorFieldType) fieldType).extractionResultField = extractionResultField.fieldType();
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType();
context.path().remove();
setupFieldType(context);
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
unknownQueryField, queryBuilderField);
extractionResultField, queryBuilderField);
}
static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
@ -102,6 +102,7 @@ public class PercolatorFieldMapper extends FieldMapper {
builder.fieldType().setDocValuesType(DocValuesType.BINARY);
return builder.build(context);
}
}
public static class TypeParser implements FieldMapper.TypeParser {
@ -115,7 +116,7 @@ public class PercolatorFieldMapper extends FieldMapper {
public static class PercolatorFieldType extends MappedFieldType {
private MappedFieldType queryTermsField;
private MappedFieldType unknownQueryField;
private MappedFieldType extractionResultField;
private MappedFieldType queryBuilderField;
public PercolatorFieldType() {
@ -127,7 +128,7 @@ public class PercolatorFieldMapper extends FieldMapper {
public PercolatorFieldType(PercolatorFieldType ref) {
super(ref);
queryTermsField = ref.queryTermsField;
unknownQueryField = ref.unknownQueryField;
extractionResultField = ref.extractionResultField;
queryBuilderField = ref.queryBuilderField;
}
@ -135,8 +136,8 @@ public class PercolatorFieldMapper extends FieldMapper {
return queryTermsField.name();
}
public String getUnknownQueryFieldName() {
return unknownQueryField.name();
public String getExtractionResultFieldName() {
return extractionResultField.name();
}
public String getQueryBuilderFieldName() {
@ -162,17 +163,17 @@ public class PercolatorFieldMapper extends FieldMapper {
private final boolean mapUnmappedFieldAsString;
private final QueryShardContext queryShardContext;
private KeywordFieldMapper queryTermsField;
private KeywordFieldMapper unknownQueryField;
private KeywordFieldMapper extractionResultField;
private BinaryFieldMapper queryBuilderField;
public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, QueryShardContext queryShardContext,
KeywordFieldMapper queryTermsField, KeywordFieldMapper unknownQueryField,
KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField,
BinaryFieldMapper queryBuilderField) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
this.queryShardContext = queryShardContext;
this.queryTermsField = queryTermsField;
this.unknownQueryField = unknownQueryField;
this.extractionResultField = extractionResultField;
this.queryBuilderField = queryBuilderField;
this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
}
@ -181,18 +182,18 @@ public class PercolatorFieldMapper extends FieldMapper {
public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType);
KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
KeywordFieldMapper unknownQueryUpdated = (KeywordFieldMapper) unknownQueryField.updateFieldType(fullNameToFieldType);
KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType);
BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
if (updated == this || queryTermsUpdated == queryTermsField || unknownQueryUpdated == unknownQueryField
|| queryBuilderUpdated == queryBuilderField) {
if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField
&& queryBuilderUpdated == queryBuilderField) {
return this;
}
if (updated == this) {
updated = (PercolatorFieldMapper) updated.clone();
}
updated.queryTermsField = queryTermsUpdated;
updated.unknownQueryField = unknownQueryUpdated;
updated.extractionResultField = extractionResultUpdated;
updated.queryBuilderField = queryBuilderUpdated;
return updated;
}
@ -220,7 +221,7 @@ public class PercolatorFieldMapper extends FieldMapper {
}
Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder);
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), unknownQueryField.name(),
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), extractionResultField.name(),
queryTermsField.fieldType());
return null;
}
@ -258,7 +259,7 @@ public class PercolatorFieldMapper extends FieldMapper {
@Override
public Iterator<Mapper> iterator() {
return Arrays.<Mapper>asList(queryTermsField, unknownQueryField, queryBuilderField).iterator();
return Arrays.<Mapper>asList(queryTermsField, extractionResultField, queryBuilderField).iterator();
}
@Override

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
@ -44,6 +46,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.percolator.ExtractQueryTermsService.Result;
import org.elasticsearch.test.ESTestCase;
@ -54,18 +57,22 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_PARTIAL;
import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException;
import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms;
import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery;
import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;
public class ExtractQueryTermsServiceTests extends ESTestCase {
public final static String QUERY_TERMS_FIELD = "extracted_terms";
public final static String UNKNOWN_QUERY_FIELD = "unknown_query";
public static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
public final static String EXTRACTION_RESULT_FIELD = "extraction_result";
public final static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
static {
QUERY_TERMS_FIELD_TYPE.setTokenized(false);
@ -81,33 +88,41 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
Collections.sort(document.getFields(), (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
assertThat(document.getFields().size(), equalTo(2));
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
assertThat(document.getFields().get(1).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(document.getFields().get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_COMPLETE));
List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(QUERY_TERMS_FIELD)));
Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
assertThat(fields.size(), equalTo(2));
assertThat(fields.get(0).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
assertThat(fields.get(1).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
}
public void testExtractQueryMetadata_unsupported() {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("field1", "term1"));
bq.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("field2", "term2"));
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(query, document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
extractQueryTerms(query, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getFields().size(), equalTo(1));
assertThat(document.getFields().get(0).name(), equalTo(UNKNOWN_QUERY_FIELD));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo(""));
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_FAILED));
}
public void testExtractQueryMetadata_notVerified() {
PhraseQuery phraseQuery = new PhraseQuery("field", "term");
ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(phraseQuery, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getFields().size(), equalTo(2));
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term"));
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_PARTIAL));
}
public void testExtractQueryMetadata_termQuery() {
TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
List<Term> terms = new ArrayList<>(extractQueryTerms(termQuery));
Result result = extractQueryTerms(termQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
@ -115,7 +130,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_termsQuery() {
TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
List<Term> terms = new ArrayList<>(extractQueryTerms(termsQuery));
Result result = extractQueryTerms(termsQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field"));
@ -125,7 +142,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
// test with different fields
termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
terms = new ArrayList<>(extractQueryTerms(termsQuery));
result = extractQueryTerms(termsQuery);
assertThat(result.verified, is(true));
terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field1"));
@ -136,7 +155,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_phraseQuery() {
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
List<Term> terms = new ArrayList<>(extractQueryTerms(phraseQuery));
Result result = extractQueryTerms(phraseQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
@ -157,7 +178,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build();
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
Result result = extractQueryTerms(booleanQuery);
assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(3));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@ -183,7 +206,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build();
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
Result result = extractQueryTerms(booleanQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@ -204,16 +229,74 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(phraseQuery, BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build();
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
Result result = extractQueryTerms(booleanQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
}
public void testExactMatch_booleanQuery() {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1"));
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2"));
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
Result result = extractQueryTerms(builder.build());
assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2");
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4");
builder.add(phraseQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, BooleanClause.Occur.MUST_NOT);
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32));
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
result = extractQueryTerms(builder.build());
assertThat("Single required clause, so candidate matches are verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
result = extractQueryTerms(builder.build());
assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
builder.add(termQuery2, BooleanClause.Occur.MUST_NOT);
result = extractQueryTerms(builder.build());
assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false));
}
public void testExtractQueryMetadata_constantScoreQuery() {
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
Result result = extractQueryTerms(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -222,7 +305,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_boostQuery() {
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
Result result = extractQueryTerms(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -232,7 +317,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
commonTermsQuery.add(new Term("_field", "_term1"));
commonTermsQuery.add(new Term("_field", "_term2"));
List<Term> terms = new ArrayList<>(extractQueryTerms(commonTermsQuery));
Result result = extractQueryTerms(commonTermsQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field"));
@ -242,15 +329,17 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
}
public void testExtractQueryMetadata_blendedTermQuery() {
Term[] terms = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(terms, false);
List<Term> result = new ArrayList<>(extractQueryTerms(commonTermsQuery));
Collections.sort(result);
assertThat(result.size(), equalTo(2));
assertThat(result.get(0).field(), equalTo("_field"));
assertThat(result.get(0).text(), equalTo("_term1"));
assertThat(result.get(1).field(), equalTo("_field"));
assertThat(result.get(1).text(), equalTo("_term2"));
Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(termsArr, false);
Result result = extractQueryTerms(commonTermsQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(terms.get(0).text(), equalTo("_term1"));
assertThat(terms.get(1).field(), equalTo("_field"));
assertThat(terms.get(1).text(), equalTo("_term2"));
}
public void testExtractQueryMetadata_spanTermQuery() {
@ -266,8 +355,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
// 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
Set<Term> terms = extractQueryTerms(spanTermQuery1);
assertTermsEqual(terms, spanTermQuery1.getTerm());
Result result = extractQueryTerms(spanTermQuery1);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
}
public void testExtractQueryMetadata_spanNearQuery() {
@ -275,48 +365,109 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
Set<Term> terms = extractQueryTerms(spanNearQuery);
assertTermsEqual(terms, spanTermQuery2.getTerm());
Result result = extractQueryTerms(spanNearQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery2.getTerm());
}
public void testExtractQueryMetadata_spanOrQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
Set<Term> terms = extractQueryTerms(spanOrQuery);
assertTermsEqual(terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
Result result = extractQueryTerms(spanOrQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
}
public void testExtractQueryMetadata_spanFirstQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
Set<Term> terms = extractQueryTerms(spanFirstQuery);
assertTermsEqual(terms, spanTermQuery1.getTerm());
Result result = extractQueryTerms(spanFirstQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
}
public void testExtractQueryMetadata_spanNotQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
Set<Term> terms = extractQueryTerms(spanNotQuery);
assertTermsEqual(terms, spanTermQuery1.getTerm());
Result result = extractQueryTerms(spanNotQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
}
public void testExtractQueryMetadata_matchNoDocsQuery() {
Set<Term> terms = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
assertEquals(0, terms.size());
Result result = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
assertThat(result.verified, is(true));
assertEquals(0, result.terms.size());
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
terms = extractQueryTerms(bq.build());
assertEquals(0, terms.size());
result = extractQueryTerms(bq.build());
assertThat(result.verified, is(false));
assertEquals(0, result.terms.size());
bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
terms = extractQueryTerms(bq.build());
assertTermsEqual(terms, new Term("field", "value"));
result = extractQueryTerms(bq.build());
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
1f
);
result = extractQueryTerms(disjunctionMaxQuery);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
}
public void testExtractQueryMetadata_matchAllDocsQuery() {
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(new MatchAllDocsQuery()));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
Result result = extractQueryTerms(builder.build());
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, new Term("field", "value"));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq1));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq2 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq3 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq3));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq4 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq4));
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq5 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq5));
}
public void testExtractQueryMetadata_unsupportedQuery() {
@ -343,8 +494,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build();
Set<Term> terms = extractQueryTerms(bq1);
assertTermsEqual(terms, termQuery1.getTerm());
Result result = extractQueryTerms(bq1);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery1.getTerm());
TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term"));
builder = new BooleanQuery.Builder();
@ -352,8 +504,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(termQuery2, BooleanClause.Occur.MUST);
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
bq1 = builder.build();
terms = extractQueryTerms(bq1);
assertTermsEqual(terms, termQuery2.getTerm());
result = extractQueryTerms(bq1);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery2.getTerm());
builder = new BooleanQuery.Builder();
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
@ -372,7 +525,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
);
List<Term> terms = new ArrayList<>(extractQueryTerms(disjunctionMaxQuery));
Result result = extractQueryTerms(disjunctionMaxQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
assertThat(terms.get(1).field(), equalTo(termQuery2.getTerm().field()));
assertThat(terms.get(1).bytes(), equalTo(termQuery2.getTerm().bytes()));
assertThat(terms.get(2).field(), equalTo(termQuery3.getTerm().field()));
assertThat(terms.get(2).bytes(), equalTo(termQuery3.getTerm().bytes()));
assertThat(terms.get(3).field(), equalTo(termQuery4.getTerm().field()));
assertThat(terms.get(3).bytes(), equalTo(termQuery4.getTerm().bytes()));
disjunctionMaxQuery = new DisjunctionMaxQuery(
Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
);
result = extractQueryTerms(disjunctionMaxQuery);
assertThat(result.verified, is(false));
terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@ -394,7 +567,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
TermsQuery query = (TermsQuery)
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD);
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));
PrefixCodedTerms terms = query.getTermData();
assertThat(terms.size(), equalTo(15L));
@ -413,7 +586,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "", UNKNOWN_QUERY_FIELD);
assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
}
public void testSelectTermsListWithHighestSumOfTermLength() {

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
@ -37,14 +38,21 @@ import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@ -52,6 +60,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.Uid;
@ -61,8 +70,11 @@ import org.junit.After;
import org.junit.Before;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import static org.hamcrest.Matchers.arrayWithSize;
import static org.hamcrest.Matchers.equalTo;
@ -72,7 +84,7 @@ public class PercolateQueryTests extends ESTestCase {
public final static String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
public final static String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
public static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
public final static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
static {
EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false);
@ -247,34 +259,91 @@ public class PercolateQueryTests extends ESTestCase {
}
public void testDuel() throws Exception {
int numQueries = scaledRandomIntBetween(32, 256);
for (int i = 0; i < numQueries; i++) {
String id = Integer.toString(i);
Query query;
List<Function<String, Query>> queries = new ArrayList<>();
queries.add((id) -> new PrefixQuery(new Term("field", id)));
queries.add((id) -> new WildcardQuery(new Term("field", id + "*")));
queries.add((id) -> new CustomQuery(new Term("field", id)));
queries.add((id) -> new SpanTermQuery(new Term("field", id)));
queries.add((id) -> new TermQuery(new Term("field", id)));
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
if (randomBoolean()) {
query = new PrefixQuery(new Term("field", id));
} else if (randomBoolean()) {
query = new WildcardQuery(new Term("field", id + "*"));
} else if (randomBoolean()) {
query = new CustomQuery(new Term("field", id + "*"));
} else if (randomBoolean()) {
query = new SpanTermQuery(new Term("field", id));
} else {
query = new TermQuery(new Term("field", id));
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
addPercolatorQuery(id, query);
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
return builder.build();
});
queries.add((id) -> new MatchAllDocsQuery());
queries.add((id) -> new MatchNoDocsQuery("no reason at all"));
int numDocs = randomIntBetween(queries.size(), queries.size() * 3);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
addPercolatorQuery(id, queries.get(i % queries.size()).apply(id));
}
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
for (int i = 0; i < numQueries; i++) {
MemoryIndex memoryIndex = new MemoryIndex();
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", id, new WhitespaceAnalyzer());
duelRun(memoryIndex, shardSearcher);
}
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
duelRun(memoryIndex, shardSearcher);
// Empty percolator doc:
memoryIndex = new MemoryIndex();
duelRun(memoryIndex, shardSearcher);
}
public void testDuelSpecificQueries() throws Exception {
@ -312,6 +381,8 @@ public class PercolateQueryTests extends ESTestCase {
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
@ -332,33 +403,33 @@ public class PercolateQueryTests extends ESTestCase {
}
private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
boolean requireScore = randomBoolean();
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery.Builder builder1 = new PercolateQuery.Builder(
PercolateQuery.Builder builder = new PercolateQuery.Builder(
"docType",
queryStore,
new BytesArray("{}"),
percolateSearcher
);
// enables the optimization that prevents queries from being evaluated that don't match
builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10);
builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
Query query = requireScore ? builder.build() : new ConstantScoreQuery(builder.build());
TopDocs topDocs = shardSearcher.search(query, 10);
PercolateQuery.Builder builder2 = new PercolateQuery.Builder(
"docType",
queryStore,
new BytesArray("{}"),
percolateSearcher
);
builder2.setPercolateTypeQuery(new MatchAllDocsQuery());
TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10);
assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits));
assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length));
for (int j = 0; j < topDocs1.scoreDocs.length; j++) {
assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc));
assertThat(topDocs1.scoreDocs[j].score, equalTo(topDocs2.scoreDocs[j].score));
Explanation explain1 = shardSearcher.explain(builder1.build(), topDocs1.scoreDocs[j].doc);
Explanation explain2 = shardSearcher.explain(builder2.build(), topDocs2.scoreDocs[j].doc);
assertThat(explain1.toHtml(), equalTo(explain2.toHtml()));
Query controlQuery = new ControlQuery(memoryIndex, queryStore);
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
for (int j = 0; j < topDocs.scoreDocs.length; j++) {
assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
if (requireScore) {
Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
assertThat(explain1.getValue(), equalTo(explain2.getValue()));
}
}
}
@ -391,4 +462,89 @@ public class PercolateQueryTests extends ESTestCase {
}
}
private final static class ControlQuery extends Query {
private final MemoryIndex memoryIndex;
private final PercolateQuery.QueryStore queryStore;
private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
this.memoryIndex = memoryIndex;
this.queryStore = queryStore;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
return new ConstantScoreWeight(this) {
float _score;
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer != null) {
int result = scorer.iterator().advance(doc);
if (result == doc) {
return Explanation.match(scorer.score(), "ControlQuery");
}
}
return Explanation.noMatch("ControlQuery");
}
@Override
public String toString() {
return "weight(" + ControlQuery.this + ")";
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context);
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
@Override
protected boolean match(int doc) {
try {
Query query = leaf.getQuery(doc);
float score = memoryIndex.search(query);
if (score != 0f) {
if (needsScores) {
_score = score;
}
return true;
} else {
return false;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) {
@Override
public float score() throws IOException {
return _score;
}
};
}
};
}
@Override
public String toString(String field) {
return "control{" + field + "}";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return classHash();
}
}
}

View File

@ -49,6 +49,8 @@ import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
@ -71,7 +73,9 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
indexService = createIndex("test", Settings.EMPTY);
mapperService = indexService.mapperService();
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
.startObject("properties")
.startObject("field").field("type", "text").endObject()
.startObject("number_field").field("type", "long").endObject()
.startObject("date_field").field("type", "date").endObject()
@ -96,20 +100,21 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.field(fieldName, queryBuilder)
.endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_COMPLETE));
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder);
// add an query for which we don't extract terms from
queryBuilder = matchAllQuery();
queryBuilder = rangeQuery("field").from("a").to("z");
doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
.field(fieldName, queryBuilder)
.endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName())[0].binaryValue(), equalTo(new BytesRef()));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_FAILED));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
@ -195,6 +200,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
public void testMultiplePercolatorFields() throws Exception {
String typeName = "another_type";
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
.startObject("properties")
.startObject("query_field1").field("type", "percolator").endObject()
.startObject("query_field2").field("type", "percolator").endObject()
@ -209,7 +215,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.field("query_field2", queryBuilder)
.endObject().bytes()
);
assertThat(doc.rootDoc().getFields().size(), equalTo(22)); // also includes all other meta fields
assertThat(doc.rootDoc().getFields().size(), equalTo(11)); // also includes _uid (1), type (2), source (1)
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
@ -221,6 +227,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
public void testNestedPercolatorField() throws Exception {
String typeName = "another_type";
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
.startObject("properties")
.startObject("object_field")
.field("type", "object")
@ -238,7 +245,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.field("query_field", queryBuilder)
.endObject().endObject().bytes()
);
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
@ -249,7 +256,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.endArray()
.endObject().bytes()
);
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);

View File

@ -397,12 +397,16 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
.addMapping("employee", mapping)
.addMapping("queries", "query", "type=percolator")
);
client().prepareIndex("test", "queries", "q").setSource(jsonBuilder().startObject()
client().prepareIndex("test", "queries", "q1").setSource(jsonBuilder().startObject()
.field("query", QueryBuilders.nestedQuery("employee",
QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
).endObject())
.setRefreshPolicy(IMMEDIATE)
.get();
// this query should never match as it doesn't use nested query:
client().prepareIndex("test", "queries", "q2").setSource(jsonBuilder().startObject()
.field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject())
.get();
client().admin().indices().prepareRefresh().get();
SearchResponse response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "employee",
@ -413,9 +417,10 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
.startObject().field("name", "tony stark").endObject()
.endArray()
.endObject().bytes()))
.addSort("_doc", SortOrder.ASC)
.get();
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("q"));
assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "employee",
@ -426,12 +431,14 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
.startObject().field("name", "tony stark").endObject()
.endArray()
.endObject().bytes()))
.addSort("_doc", SortOrder.ASC)
.get();
assertHitCount(response, 0);
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "employee",
XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject().bytes()))
.addSort("_doc", SortOrder.ASC)
.get();
assertHitCount(response, 0);
}