percolator: Don't verify candidate matches with MemoryIndex that are verified matches

If we don't care about scoring then for certain candidate matches we can be certain, that if they are a candidate match,
then they will always match. So verifying these queries with the MemoryIndex can be skipped.
This commit is contained in:
Martijn van Groningen 2016-06-02 12:53:05 +02:00
parent 6d5b4a78fe
commit 599a548998
9 changed files with 799 additions and 247 deletions

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.lucene; package org.elasticsearch.common.lucene;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
@ -27,6 +28,8 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoDeletionPolicy; import org.apache.lucene.index.NoDeletionPolicy;
import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
@ -35,9 +38,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import java.io.IOException; import java.io.IOException;
@ -49,9 +54,6 @@ import java.util.Set;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
/**
*
*/
public class LuceneTests extends ESTestCase { public class LuceneTests extends ESTestCase {
public void testWaitForIndex() throws Exception { public void testWaitForIndex() throws Exception {
final MockDirectoryWrapper dir = newMockDirectory(); final MockDirectoryWrapper dir = newMockDirectory();
@ -355,6 +357,45 @@ public class LuceneTests extends ESTestCase {
dir.close(); dir.close();
} }
public void testAsSequentialAccessBits() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
Document doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
doc = new Document();
w.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
try (DirectoryReader reader = DirectoryReader.open(w)) {
IndexSearcher searcher = newSearcher(reader);
Weight termWeight = new TermQuery(new Term("foo", "bar")).createWeight(searcher, false);
assertEquals(1, reader.leaves().size());
LeafReaderContext leafReaderContext = reader.leaves().get(0);
Bits bits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), termWeight.scorer(leafReaderContext));
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(-1));
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(leafReaderContext.reader().maxDoc()));
assertTrue(bits.get(0));
assertTrue(bits.get(0));
assertFalse(bits.get(1));
assertFalse(bits.get(1));
expectThrows(IllegalArgumentException.class, () -> bits.get(0));
assertTrue(bits.get(2));
assertTrue(bits.get(2));
expectThrows(IllegalArgumentException.class, () -> bits.get(1));
}
w.close();
dir.close();
}
/** /**
* Test that the "unmap hack" is detected as supported by lucene. * Test that the "unmap hack" is detected as supported by lucene.
* This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038 * This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038

View File

@ -53,10 +53,13 @@ import org.elasticsearch.index.mapper.ParseContext;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.function.Function;
/** /**
* Utility to extract query terms from queries and create queries from documents. * Utility to extract query terms from queries and create queries from documents.
@ -64,64 +67,179 @@ import java.util.Set;
public final class ExtractQueryTermsService { public final class ExtractQueryTermsService {
private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
public static final String EXTRACTION_COMPLETE = "complete";
public static final String EXTRACTION_PARTIAL = "partial";
public static final String EXTRACTION_FAILED = "failed";
static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
static {
Map<Class<? extends Query>, Function<Query, Result>> map = new HashMap<>(16);
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
map.put(ConstantScoreQuery.class, constantScoreQuery());
map.put(BoostQuery.class, boostQuery());
map.put(TermQuery.class, termQuery());
map.put(TermsQuery.class, termsQuery());
map.put(CommonTermsQuery.class, commonTermsQuery());
map.put(BlendedTermQuery.class, blendedTermQuery());
map.put(PhraseQuery.class, phraseQuery());
map.put(SpanTermQuery.class, spanTermQuery());
map.put(SpanNearQuery.class, spanNearQuery());
map.put(SpanOrQuery.class, spanOrQuery());
map.put(SpanFirstQuery.class, spanFirstQuery());
map.put(SpanNotQuery.class, spanNotQuery());
map.put(BooleanQuery.class, booleanQuery());
map.put(DisjunctionMaxQuery.class, disjunctionMaxQuery());
queryProcessors = Collections.unmodifiableMap(map);
}
private ExtractQueryTermsService() { private ExtractQueryTermsService() {
} }
/** /**
* Extracts all terms from the specified query and adds it to the specified document. * Extracts all terms from the specified query and adds it to the specified document.
*
* @param query The query to extract terms from * @param query The query to extract terms from
* @param document The document to add the extracted terms to * @param document The document to add the extracted terms to
* @param queryTermsFieldField The field in the document holding the extracted terms * @param queryTermsFieldField The field in the document holding the extracted terms
* @param unknownQueryField The field used to mark a document that not all query terms could be extracted. * @param extractionResultField The field contains whether query term extraction was successful, partial or
* For example the query contained an unsupported query (e.g. WildcardQuery). * failed. (For example the query contained an unsupported query (e.g. WildcardQuery)
* @param fieldType The field type for the query metadata field * then query extraction would fail)
* @param fieldType The field type for the query metadata field
*/ */
public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField, public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField,
String unknownQueryField, FieldType fieldType) { String extractionResultField, FieldType fieldType) {
Set<Term> queryTerms; Result result;
try { try {
queryTerms = extractQueryTerms(query); result = extractQueryTerms(query);
} catch (UnsupportedQueryException e) { } catch (UnsupportedQueryException e) {
document.add(new Field(unknownQueryField, new BytesRef(), fieldType)); document.add(new Field(extractionResultField, EXTRACTION_FAILED, fieldType));
return; return;
} }
for (Term term : queryTerms) { for (Term term : result.terms) {
BytesRefBuilder builder = new BytesRefBuilder(); BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field())); builder.append(new BytesRef(term.field()));
builder.append(FIELD_VALUE_SEPARATOR); builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes()); builder.append(term.bytes());
document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType)); document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType));
} }
if (result.verified) {
document.add(new Field(extractionResultField, EXTRACTION_COMPLETE, fieldType));
} else {
document.add(new Field(extractionResultField, EXTRACTION_PARTIAL, fieldType));
}
}
/**
* Creates a terms query containing all terms from all fields of the specified index reader.
*/
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
Term... optionalTerms) throws IOException {
Objects.requireNonNull(queryMetadataField);
List<Term> extractedTerms = new ArrayList<>();
Collections.addAll(extractedTerms, optionalTerms);
Fields fields = MultiFields.getFields(indexReader);
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
BytesRef fieldBr = new BytesRef(field);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
}
}
return new TermsQuery(extractedTerms);
} }
/** /**
* Extracts all query terms from the provided query and adds it to specified list. * Extracts all query terms from the provided query and adds it to specified list.
* * <p>
* From boolean query with no should clauses or phrase queries only the longest term are selected, * From boolean query with no should clauses or phrase queries only the longest term are selected,
* since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored. * since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
* * <p>
* If from part of the query, no query terms can be extracted then term extraction is stopped and * If from part of the query, no query terms can be extracted then term extraction is stopped and
* an UnsupportedQueryException is thrown. * an UnsupportedQueryException is thrown.
*/ */
static Set<Term> extractQueryTerms(Query query) { static Result extractQueryTerms(Query query) {
if (query instanceof MatchNoDocsQuery) { Class queryClass = query.getClass();
// no terms to extract as this query matches no docs if (queryClass.isAnonymousClass()) {
return Collections.emptySet(); // Sometimes queries have anonymous classes in that case we need the direct super class.
} else if (query instanceof TermQuery) { // (for example blended term query)
return Collections.singleton(((TermQuery) query).getTerm()); queryClass = queryClass.getSuperclass();
} else if (query instanceof TermsQuery) { }
Set<Term> terms = new HashSet<>(); Function<Query, Result> queryProcessor = queryProcessors.get(queryClass);
if (queryProcessor != null) {
return queryProcessor.apply(query);
} else {
throw new UnsupportedQueryException(query);
}
}
static Function<Query, Result> matchNoDocsQuery() {
return (query -> new Result(true, Collections.emptySet()));
}
static Function<Query, Result> constantScoreQuery() {
return query -> {
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
};
}
static Function<Query, Result> boostQuery() {
return query -> {
Query wrappedQuery = ((BoostQuery) query).getQuery();
return extractQueryTerms(wrappedQuery);
};
}
static Function<Query, Result> termQuery() {
return (query -> {
TermQuery termQuery = (TermQuery) query;
return new Result(true, Collections.singleton(termQuery.getTerm()));
});
}
static Function<Query, Result> termsQuery() {
return query -> {
TermsQuery termsQuery = (TermsQuery) query; TermsQuery termsQuery = (TermsQuery) query;
Set<Term> terms = new HashSet<>();
PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator(); PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
terms.add(new Term(iterator.field(), term)); terms.add(new Term(iterator.field(), term));
} }
return terms; return new Result(true, terms);
} else if (query instanceof PhraseQuery) { };
}
static Function<Query, Result> commonTermsQuery() {
return query -> {
List<Term> terms = ((CommonTermsQuery) query).getTerms();
return new Result(false, new HashSet<>(terms));
};
}
static Function<Query, Result> blendedTermQuery() {
return query -> {
List<Term> terms = ((BlendedTermQuery) query).getTerms();
return new Result(true, new HashSet<>(terms));
};
}
static Function<Query, Result> phraseQuery() {
return query -> {
Term[] terms = ((PhraseQuery) query).getTerms(); Term[] terms = ((PhraseQuery) query).getTerms();
if (terms.length == 0) { if (terms.length == 0) {
return Collections.emptySet(); return new Result(true, Collections.emptySet());
} }
// the longest term is likely to be the rarest, // the longest term is likely to be the rarest,
@ -132,19 +250,76 @@ public final class ExtractQueryTermsService {
longestTerm = term; longestTerm = term;
} }
} }
return Collections.singleton(longestTerm); return new Result(false, Collections.singleton(longestTerm));
} else if (query instanceof BooleanQuery) { };
List<BooleanClause> clauses = ((BooleanQuery) query).clauses(); }
boolean hasRequiredClauses = false;
static Function<Query, Result> spanTermQuery() {
return query -> {
Term term = ((SpanTermQuery) query).getTerm();
return new Result(true, Collections.singleton(term));
};
}
static Function<Query, Result> spanNearQuery() {
return query -> {
Set<Term> bestClauses = null;
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
for (SpanQuery clause : spanNearQuery.getClauses()) {
Result temp = extractQueryTerms(clause);
bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses);
}
return new Result(false, bestClauses);
};
}
static Function<Query, Result> spanOrQuery() {
return query -> {
Set<Term> terms = new HashSet<>();
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
for (SpanQuery clause : spanOrQuery.getClauses()) {
terms.addAll(extractQueryTerms(clause).terms);
}
return new Result(false, terms);
};
}
static Function<Query, Result> spanNotQuery() {
return query -> {
Result result = extractQueryTerms(((SpanNotQuery) query).getInclude());
return new Result(false, result.terms);
};
}
static Function<Query, Result> spanFirstQuery() {
return query -> {
Result result = extractQueryTerms(((SpanFirstQuery) query).getMatch());
return new Result(false, result.terms);
};
}
static Function<Query, Result> booleanQuery() {
return query -> {
BooleanQuery bq = (BooleanQuery) query;
List<BooleanClause> clauses = bq.clauses();
int minimumShouldMatch = bq.getMinimumNumberShouldMatch();
int numRequiredClauses = 0;
int numOptionalClauses = 0;
int numProhibitedClauses = 0;
for (BooleanClause clause : clauses) { for (BooleanClause clause : clauses) {
if (clause.isRequired()) { if (clause.isRequired()) {
hasRequiredClauses = true; numRequiredClauses++;
break; }
if (clause.isProhibited()) {
numProhibitedClauses++;
}
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
numOptionalClauses++;
} }
} }
if (hasRequiredClauses) { if (numRequiredClauses > 0) {
UnsupportedQueryException uqe = null;
Set<Term> bestClause = null; Set<Term> bestClause = null;
UnsupportedQueryException uqe = null;
for (BooleanClause clause : clauses) { for (BooleanClause clause : clauses) {
if (clause.isRequired() == false) { if (clause.isRequired() == false) {
// skip must_not clauses, we don't need to remember the things that do *not* match... // skip must_not clauses, we don't need to remember the things that do *not* match...
@ -153,77 +328,56 @@ public final class ExtractQueryTermsService {
continue; continue;
} }
Set<Term> temp; Result temp;
try { try {
temp = extractQueryTerms(clause.getQuery()); temp = extractQueryTerms(clause.getQuery());
} catch (UnsupportedQueryException e) { } catch (UnsupportedQueryException e) {
uqe = e; uqe = e;
continue; continue;
} }
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause); bestClause = selectTermListWithTheLongestShortestTerm(temp.terms, bestClause);
} }
if (bestClause != null) { if (bestClause != null) {
return bestClause; return new Result(false, bestClause);
} else { } else {
if (uqe != null) { if (uqe != null) {
// we're unable to select the best clause and an exception occurred, so we bail
throw uqe; throw uqe;
} else {
// We didn't find a clause and no exception occurred, so this bq only contained MatchNoDocsQueries,
return new Result(true, Collections.emptySet());
} }
return Collections.emptySet();
} }
} else { } else {
Set<Term> terms = new HashSet<>(); List<Query> disjunctions = new ArrayList<>(numOptionalClauses);
for (BooleanClause clause : clauses) { for (BooleanClause clause : clauses) {
if (clause.isProhibited()) { if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
// we don't need to remember the things that do *not* match... disjunctions.add(clause.getQuery());
continue;
} }
terms.addAll(extractQueryTerms(clause.getQuery()));
} }
return terms; return handleDisjunction(disjunctions, minimumShouldMatch, numProhibitedClauses > 0);
} }
} else if (query instanceof ConstantScoreQuery) { };
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery(); }
return extractQueryTerms(wrappedQuery);
} else if (query instanceof BoostQuery) { static Function<Query, Result> disjunctionMaxQuery() {
Query wrappedQuery = ((BoostQuery) query).getQuery(); return query -> {
return extractQueryTerms(wrappedQuery);
} else if (query instanceof CommonTermsQuery) {
List<Term> terms = ((CommonTermsQuery) query).getTerms();
return new HashSet<>(terms);
} else if (query instanceof BlendedTermQuery) {
List<Term> terms = ((BlendedTermQuery) query).getTerms();
return new HashSet<>(terms);
} else if (query instanceof DisjunctionMaxQuery) {
List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts(); List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts();
Set<Term> terms = new HashSet<>(); return handleDisjunction(disjuncts, 1, false);
for (Query disjunct : disjuncts) { };
terms.addAll(extractQueryTerms(disjunct)); }
static Result handleDisjunction(List<Query> disjunctions, int minimumShouldMatch, boolean otherClauses) {
boolean verified = minimumShouldMatch <= 1 && otherClauses == false;
Set<Term> terms = new HashSet<>();
for (Query disjunct : disjunctions) {
Result subResult = extractQueryTerms(disjunct);
if (subResult.verified == false) {
verified = false;
} }
return terms; terms.addAll(subResult.terms);
} else if (query instanceof SpanTermQuery) {
return Collections.singleton(((SpanTermQuery) query).getTerm());
} else if (query instanceof SpanNearQuery) {
Set<Term> bestClause = null;
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
for (SpanQuery clause : spanNearQuery.getClauses()) {
Set<Term> temp = extractQueryTerms(clause);
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
}
return bestClause;
} else if (query instanceof SpanOrQuery) {
Set<Term> terms = new HashSet<>();
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
for (SpanQuery clause : spanOrQuery.getClauses()) {
terms.addAll(extractQueryTerms(clause));
}
return terms;
} else if (query instanceof SpanFirstQuery) {
return extractQueryTerms(((SpanFirstQuery)query).getMatch());
} else if (query instanceof SpanNotQuery) {
return extractQueryTerms(((SpanNotQuery) query).getInclude());
} else {
throw new UnsupportedQueryException(query);
} }
return new Result(verified, terms);
} }
static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) { static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) {
@ -243,7 +397,7 @@ public final class ExtractQueryTermsService {
} }
} }
private static int minTermLength(Set<Term> terms) { static int minTermLength(Set<Term> terms) {
int min = Integer.MAX_VALUE; int min = Integer.MAX_VALUE;
for (Term term : terms) { for (Term term : terms) {
min = Math.min(min, term.bytes().length); min = Math.min(min, term.bytes().length);
@ -251,40 +405,22 @@ public final class ExtractQueryTermsService {
return min; return min;
} }
/** static class Result {
* Creates a boolean query with a should clause for each term on all fields of the specified index reader.
*/
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
String unknownQueryField) throws IOException {
Objects.requireNonNull(queryMetadataField);
Objects.requireNonNull(unknownQueryField);
List<Term> extractedTerms = new ArrayList<>(); final Set<Term> terms;
extractedTerms.add(new Term(unknownQueryField)); final boolean verified;
Fields fields = MultiFields.getFields(indexReader);
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
BytesRef fieldBr = new BytesRef(field); Result(boolean verified, Set<Term> terms) {
TermsEnum tenum = terms.iterator(); this.terms = terms;
for (BytesRef term = tenum.next(); term != null ; term = tenum.next()) { this.verified = verified;
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
}
} }
return new TermsQuery(extractedTerms);
} }
/** /**
* Exception indicating that none or some query terms couldn't extracted from a percolator query. * Exception indicating that none or some query terms couldn't extracted from a percolator query.
*/ */
public static class UnsupportedQueryException extends RuntimeException { static class UnsupportedQueryException extends RuntimeException {
private final Query unsupportedQuery; private final Query unsupportedQuery;

View File

@ -28,12 +28,15 @@ import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import java.io.IOException; import java.io.IOException;
import java.util.Objects; import java.util.Objects;
@ -54,6 +57,7 @@ public final class PercolateQuery extends Query implements Accountable {
private final IndexSearcher percolatorIndexSearcher; private final IndexSearcher percolatorIndexSearcher;
private Query queriesMetaDataQuery; private Query queriesMetaDataQuery;
private Query verifiedQueriesQuery = new MatchNoDocsQuery("");
private Query percolateTypeQuery; private Query percolateTypeQuery;
/** /**
@ -64,21 +68,32 @@ public final class PercolateQuery extends Query implements Accountable {
*/ */
public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) { public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) {
this.docType = Objects.requireNonNull(docType); this.docType = Objects.requireNonNull(docType);
this.queryStore = Objects.requireNonNull(queryStore);
this.documentSource = Objects.requireNonNull(documentSource); this.documentSource = Objects.requireNonNull(documentSource);
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher); this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
this.queryStore = Objects.requireNonNull(queryStore);
} }
/** /**
* Optionally sets a query that reduces the number of queries to percolate based on extracted terms from * Optionally sets a query that reduces the number of queries to percolate based on extracted terms from
* the document to be percolated. * the document to be percolated.
* * @param extractedTermsFieldName The name of the field to get the extracted terms from
* @param extractedTermsFieldName The name of the field to get the extracted terms from * @param extractionResultField The field to indicate for a document whether query term extraction was complete,
* @param unknownQueryFieldname The field used to mark documents whose queries couldn't all get extracted * partial or failed. If query extraction was complete, the MemoryIndex doesn't
*/ */
public void extractQueryTermsQuery(String extractedTermsFieldName, String unknownQueryFieldname) throws IOException { public void extractQueryTermsQuery(String extractedTermsFieldName, String extractionResultField) throws IOException {
// We can only skip the MemoryIndex verification when percolating a single document.
// When the document being percolated contains a nested object field then the MemoryIndex contains multiple
// documents. In this case the term query that indicates whether memory index verification can be skipped
// can incorrectly indicate that non nested queries would match, while their nested variants would not.
if (percolatorIndexSearcher.getIndexReader().maxDoc() == 1) {
this.verifiedQueriesQuery = new TermQuery(new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_COMPLETE));
}
this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery( this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery(
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName, unknownQueryFieldname percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName,
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_FAILED)
); );
} }
@ -94,14 +109,15 @@ public final class PercolateQuery extends Query implements Accountable {
throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata"); throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata");
} }
// The query that selects which percolator queries will be evaluated by MemoryIndex: // The query that selects which percolator queries will be evaluated by MemoryIndex:
BooleanQuery.Builder builder = new BooleanQuery.Builder(); BooleanQuery.Builder queriesQuery = new BooleanQuery.Builder();
if (percolateTypeQuery != null) { if (percolateTypeQuery != null) {
builder.add(percolateTypeQuery, FILTER); queriesQuery.add(percolateTypeQuery, FILTER);
} }
if (queriesMetaDataQuery != null) { if (queriesMetaDataQuery != null) {
builder.add(queriesMetaDataQuery, FILTER); queriesQuery.add(queriesMetaDataQuery, FILTER);
} }
return new PercolateQuery(docType, queryStore, documentSource, builder.build(), percolatorIndexSearcher); return new PercolateQuery(docType, queryStore, documentSource, queriesQuery.build(), percolatorIndexSearcher,
verifiedQueriesQuery);
} }
} }
@ -110,22 +126,25 @@ public final class PercolateQuery extends Query implements Accountable {
private final QueryStore queryStore; private final QueryStore queryStore;
private final BytesReference documentSource; private final BytesReference documentSource;
private final Query percolatorQueriesQuery; private final Query percolatorQueriesQuery;
private final Query verifiedQueriesQuery;
private final IndexSearcher percolatorIndexSearcher; private final IndexSearcher percolatorIndexSearcher;
private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource, private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher) { Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedQueriesQuery) {
this.documentType = documentType; this.documentType = documentType;
this.documentSource = documentSource; this.documentSource = documentSource;
this.percolatorQueriesQuery = percolatorQueriesQuery; this.percolatorQueriesQuery = percolatorQueriesQuery;
this.queryStore = queryStore; this.queryStore = queryStore;
this.percolatorIndexSearcher = percolatorIndexSearcher; this.percolatorIndexSearcher = percolatorIndexSearcher;
this.verifiedQueriesQuery = verifiedQueriesQuery;
} }
@Override @Override
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = percolatorQueriesQuery.rewrite(reader); Query rewritten = percolatorQueriesQuery.rewrite(reader);
if (rewritten != percolatorQueriesQuery) { if (rewritten != percolatorQueriesQuery) {
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher); return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher,
verifiedQueriesQuery);
} else { } else {
return this; return this;
} }
@ -133,6 +152,7 @@ public final class PercolateQuery extends Query implements Accountable {
@Override @Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final Weight verifiedQueriesQueryWeight = verifiedQueriesQuery.createWeight(searcher, false);
final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores); final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores);
return new Weight(this) { return new Weight(this) {
@Override @Override
@ -206,6 +226,8 @@ public final class PercolateQuery extends Query implements Accountable {
} }
}; };
} else { } else {
Scorer verifiedDocsScorer = verifiedQueriesQueryWeight.scorer(leafReaderContext);
Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) { return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {
@Override @Override
@ -214,6 +236,14 @@ public final class PercolateQuery extends Query implements Accountable {
} }
boolean matchDocId(int docId) throws IOException { boolean matchDocId(int docId) throws IOException {
// We use the verifiedDocsBits to skip the expensive MemoryIndex verification.
// If docId also appears in the verifiedDocsBits then that means during indexing
// we were able to extract all query terms and for this candidate match
// and we determined based on the nature of the query that it is safe to skip
// the MemoryIndex verification.
if (verifiedDocsBits.get(docId)) {
return true;
}
Query query = percolatorQueries.getQuery(docId); Query query = percolatorQueries.getQuery(docId);
return query != null && Lucene.exists(percolatorIndexSearcher, query); return query != null && Lucene.exists(percolatorIndexSearcher, query);
} }

View File

@ -420,10 +420,11 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
"] to be of type [percolator], but is of type [" + fieldType.typeName() + "]"); "] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
} }
PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType; PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
PercolateQuery.QueryStore queryStore = createStore(pft, context, mapUnmappedFieldsAsString);
PercolateQuery.Builder builder = new PercolateQuery.Builder( PercolateQuery.Builder builder = new PercolateQuery.Builder(
documentType, createStore(pft, context, mapUnmappedFieldsAsString), document, docSearcher documentType, queryStore, document, docSearcher
); );
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getUnknownQueryFieldName()); builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getExtractionResultFieldName());
return builder.build(); return builder.build();
} else { } else {
Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME)); Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME));

View File

@ -58,7 +58,7 @@ public class PercolatorFieldMapper extends FieldMapper {
private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType(); private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType();
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms"; public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
public static final String UNKNOWN_QUERY_FIELD_NAME = "unknown_query"; public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field"; public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> { public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
@ -75,15 +75,15 @@ public class PercolatorFieldMapper extends FieldMapper {
context.path().add(name()); context.path().add(name());
KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context); KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType(); ((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType();
KeywordFieldMapper unknownQueryField = createExtractQueryFieldBuilder(UNKNOWN_QUERY_FIELD_NAME, context); KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context);
((PercolatorFieldType) fieldType).unknownQueryField = unknownQueryField.fieldType(); ((PercolatorFieldType) fieldType).extractionResultField = extractionResultField.fieldType();
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context); BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType(); ((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType();
context.path().remove(); context.path().remove();
setupFieldType(context); setupFieldType(context);
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(), return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField, multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
unknownQueryField, queryBuilderField); extractionResultField, queryBuilderField);
} }
static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) { static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
@ -102,6 +102,7 @@ public class PercolatorFieldMapper extends FieldMapper {
builder.fieldType().setDocValuesType(DocValuesType.BINARY); builder.fieldType().setDocValuesType(DocValuesType.BINARY);
return builder.build(context); return builder.build(context);
} }
} }
public static class TypeParser implements FieldMapper.TypeParser { public static class TypeParser implements FieldMapper.TypeParser {
@ -115,7 +116,7 @@ public class PercolatorFieldMapper extends FieldMapper {
public static class PercolatorFieldType extends MappedFieldType { public static class PercolatorFieldType extends MappedFieldType {
private MappedFieldType queryTermsField; private MappedFieldType queryTermsField;
private MappedFieldType unknownQueryField; private MappedFieldType extractionResultField;
private MappedFieldType queryBuilderField; private MappedFieldType queryBuilderField;
public PercolatorFieldType() { public PercolatorFieldType() {
@ -127,7 +128,7 @@ public class PercolatorFieldMapper extends FieldMapper {
public PercolatorFieldType(PercolatorFieldType ref) { public PercolatorFieldType(PercolatorFieldType ref) {
super(ref); super(ref);
queryTermsField = ref.queryTermsField; queryTermsField = ref.queryTermsField;
unknownQueryField = ref.unknownQueryField; extractionResultField = ref.extractionResultField;
queryBuilderField = ref.queryBuilderField; queryBuilderField = ref.queryBuilderField;
} }
@ -135,8 +136,8 @@ public class PercolatorFieldMapper extends FieldMapper {
return queryTermsField.name(); return queryTermsField.name();
} }
public String getUnknownQueryFieldName() { public String getExtractionResultFieldName() {
return unknownQueryField.name(); return extractionResultField.name();
} }
public String getQueryBuilderFieldName() { public String getQueryBuilderFieldName() {
@ -162,17 +163,17 @@ public class PercolatorFieldMapper extends FieldMapper {
private final boolean mapUnmappedFieldAsString; private final boolean mapUnmappedFieldAsString;
private final QueryShardContext queryShardContext; private final QueryShardContext queryShardContext;
private KeywordFieldMapper queryTermsField; private KeywordFieldMapper queryTermsField;
private KeywordFieldMapper unknownQueryField; private KeywordFieldMapper extractionResultField;
private BinaryFieldMapper queryBuilderField; private BinaryFieldMapper queryBuilderField;
public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, QueryShardContext queryShardContext, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, QueryShardContext queryShardContext,
KeywordFieldMapper queryTermsField, KeywordFieldMapper unknownQueryField, KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField,
BinaryFieldMapper queryBuilderField) { BinaryFieldMapper queryBuilderField) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
this.queryShardContext = queryShardContext; this.queryShardContext = queryShardContext;
this.queryTermsField = queryTermsField; this.queryTermsField = queryTermsField;
this.unknownQueryField = unknownQueryField; this.extractionResultField = extractionResultField;
this.queryBuilderField = queryBuilderField; this.queryBuilderField = queryBuilderField;
this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings); this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
} }
@ -181,18 +182,18 @@ public class PercolatorFieldMapper extends FieldMapper {
public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) { public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType); PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType);
KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType); KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
KeywordFieldMapper unknownQueryUpdated = (KeywordFieldMapper) unknownQueryField.updateFieldType(fullNameToFieldType); KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType);
BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType); BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
if (updated == this || queryTermsUpdated == queryTermsField || unknownQueryUpdated == unknownQueryField if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField
|| queryBuilderUpdated == queryBuilderField) { && queryBuilderUpdated == queryBuilderField) {
return this; return this;
} }
if (updated == this) { if (updated == this) {
updated = (PercolatorFieldMapper) updated.clone(); updated = (PercolatorFieldMapper) updated.clone();
} }
updated.queryTermsField = queryTermsUpdated; updated.queryTermsField = queryTermsUpdated;
updated.unknownQueryField = unknownQueryUpdated; updated.extractionResultField = extractionResultUpdated;
updated.queryBuilderField = queryBuilderUpdated; updated.queryBuilderField = queryBuilderUpdated;
return updated; return updated;
} }
@ -220,7 +221,7 @@ public class PercolatorFieldMapper extends FieldMapper {
} }
Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder); Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder);
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), unknownQueryField.name(), ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), extractionResultField.name(),
queryTermsField.fieldType()); queryTermsField.fieldType());
return null; return null;
} }
@ -258,7 +259,7 @@ public class PercolatorFieldMapper extends FieldMapper {
@Override @Override
public Iterator<Mapper> iterator() { public Iterator<Mapper> iterator() {
return Arrays.<Mapper>asList(queryTermsField, unknownQueryField, queryBuilderField).iterator(); return Arrays.<Mapper>asList(queryTermsField, extractionResultField, queryBuilderField).iterator();
} }
@Override @Override

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.PrefixCodedTerms; import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.index.memory.MemoryIndex;
@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
@ -44,6 +46,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.percolator.ExtractQueryTermsService.Result;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
@ -54,18 +57,22 @@ import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_PARTIAL;
import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException; import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException;
import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms; import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms;
import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery; import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery;
import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm; import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance; import static org.hamcrest.Matchers.sameInstance;
public class ExtractQueryTermsServiceTests extends ESTestCase { public class ExtractQueryTermsServiceTests extends ESTestCase {
public final static String QUERY_TERMS_FIELD = "extracted_terms"; public final static String QUERY_TERMS_FIELD = "extracted_terms";
public final static String UNKNOWN_QUERY_FIELD = "unknown_query"; public final static String EXTRACTION_RESULT_FIELD = "extraction_result";
public static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType(); public final static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
static { static {
QUERY_TERMS_FIELD_TYPE.setTokenized(false); QUERY_TERMS_FIELD_TYPE.setTokenized(false);
@ -81,33 +88,41 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
bq.add(termQuery2, BooleanClause.Occur.SHOULD); bq.add(termQuery2, BooleanClause.Occur.SHOULD);
ParseContext.Document document = new ParseContext.Document(); ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE); extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
Collections.sort(document.getFields(), (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue())); assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_COMPLETE));
assertThat(document.getFields().size(), equalTo(2)); List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(QUERY_TERMS_FIELD)));
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD)); Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1")); assertThat(fields.size(), equalTo(2));
assertThat(document.getFields().get(1).name(), equalTo(QUERY_TERMS_FIELD)); assertThat(fields.get(0).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(document.getFields().get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2")); assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
assertThat(fields.get(1).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
} }
public void testExtractQueryMetadata_unsupported() { public void testExtractQueryMetadata_unsupported() {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("field1", "term1"));
bq.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("field2", "term2"));
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true); TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
ParseContext.Document document = new ParseContext.Document(); ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(query, document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE); extractQueryTerms(query, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getFields().size(), equalTo(1)); assertThat(document.getFields().size(), equalTo(1));
assertThat(document.getFields().get(0).name(), equalTo(UNKNOWN_QUERY_FIELD)); assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_FAILED));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("")); }
public void testExtractQueryMetadata_notVerified() {
PhraseQuery phraseQuery = new PhraseQuery("field", "term");
ParseContext.Document document = new ParseContext.Document();
extractQueryTerms(phraseQuery, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
assertThat(document.getFields().size(), equalTo(2));
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term"));
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_PARTIAL));
} }
public void testExtractQueryMetadata_termQuery() { public void testExtractQueryMetadata_termQuery() {
TermQuery termQuery = new TermQuery(new Term("_field", "_term")); TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
List<Term> terms = new ArrayList<>(extractQueryTerms(termQuery)); Result result = extractQueryTerms(termQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1)); assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field())); assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes())); assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
@ -115,7 +130,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_termsQuery() { public void testExtractQueryMetadata_termsQuery() {
TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2")); TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
List<Term> terms = new ArrayList<>(extractQueryTerms(termsQuery)); Result result = extractQueryTerms(termsQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms); Collections.sort(terms);
assertThat(terms.size(), equalTo(2)); assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field")); assertThat(terms.get(0).field(), equalTo("_field"));
@ -125,7 +142,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
// test with different fields // test with different fields
termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2")); termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
terms = new ArrayList<>(extractQueryTerms(termsQuery)); result = extractQueryTerms(termsQuery);
assertThat(result.verified, is(true));
terms = new ArrayList<>(result.terms);
Collections.sort(terms); Collections.sort(terms);
assertThat(terms.size(), equalTo(2)); assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field1")); assertThat(terms.get(0).field(), equalTo("_field1"));
@ -136,7 +155,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_phraseQuery() { public void testExtractQueryMetadata_phraseQuery() {
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2"); PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
List<Term> terms = new ArrayList<>(extractQueryTerms(phraseQuery)); Result result = extractQueryTerms(phraseQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1)); assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field())); assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes())); assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
@ -157,7 +178,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD); builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build(); BooleanQuery booleanQuery = builder.build();
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery)); Result result = extractQueryTerms(booleanQuery);
assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms); Collections.sort(terms);
assertThat(terms.size(), equalTo(3)); assertThat(terms.size(), equalTo(3));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@ -183,7 +206,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD); builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build(); BooleanQuery booleanQuery = builder.build();
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery)); Result result = extractQueryTerms(booleanQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms); Collections.sort(terms);
assertThat(terms.size(), equalTo(4)); assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@ -204,16 +229,74 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(phraseQuery, BooleanClause.Occur.SHOULD); builder.add(phraseQuery, BooleanClause.Occur.SHOULD);
BooleanQuery booleanQuery = builder.build(); BooleanQuery booleanQuery = builder.build();
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery)); Result result = extractQueryTerms(booleanQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1)); assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field())); assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes())); assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
} }
public void testExactMatch_booleanQuery() {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1"));
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2"));
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
Result result = extractQueryTerms(builder.build());
assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2");
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4");
builder.add(phraseQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, BooleanClause.Occur.MUST_NOT);
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32));
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
result = extractQueryTerms(builder.build());
assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
result = extractQueryTerms(builder.build());
assertThat("Single required clause, so candidate matches are verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
result = extractQueryTerms(builder.build());
assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false));
builder = new BooleanQuery.Builder();
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
builder.add(termQuery2, BooleanClause.Occur.MUST_NOT);
result = extractQueryTerms(builder.build());
assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false));
}
public void testExtractQueryMetadata_constantScoreQuery() { public void testExtractQueryMetadata_constantScoreQuery() {
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1); ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery)); Result result = extractQueryTerms(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1)); assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -222,7 +305,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
public void testExtractQueryMetadata_boostQuery() { public void testExtractQueryMetadata_boostQuery() {
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term")); TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f); BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery)); Result result = extractQueryTerms(constantScoreQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1)); assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes())); assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@ -232,7 +317,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100); CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
commonTermsQuery.add(new Term("_field", "_term1")); commonTermsQuery.add(new Term("_field", "_term1"));
commonTermsQuery.add(new Term("_field", "_term2")); commonTermsQuery.add(new Term("_field", "_term2"));
List<Term> terms = new ArrayList<>(extractQueryTerms(commonTermsQuery)); Result result = extractQueryTerms(commonTermsQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms); Collections.sort(terms);
assertThat(terms.size(), equalTo(2)); assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field")); assertThat(terms.get(0).field(), equalTo("_field"));
@ -242,15 +329,17 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
} }
public void testExtractQueryMetadata_blendedTermQuery() { public void testExtractQueryMetadata_blendedTermQuery() {
Term[] terms = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")}; Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(terms, false); BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(termsArr, false);
List<Term> result = new ArrayList<>(extractQueryTerms(commonTermsQuery)); Result result = extractQueryTerms(commonTermsQuery);
Collections.sort(result); assertThat(result.verified, is(true));
assertThat(result.size(), equalTo(2)); List<Term> terms = new ArrayList<>(result.terms);
assertThat(result.get(0).field(), equalTo("_field")); Collections.sort(terms);
assertThat(result.get(0).text(), equalTo("_term1")); assertThat(terms.size(), equalTo(2));
assertThat(result.get(1).field(), equalTo("_field")); assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(result.get(1).text(), equalTo("_term2")); assertThat(terms.get(0).text(), equalTo("_term1"));
assertThat(terms.get(1).field(), equalTo("_field"));
assertThat(terms.get(1).text(), equalTo("_term2"));
} }
public void testExtractQueryMetadata_spanTermQuery() { public void testExtractQueryMetadata_spanTermQuery() {
@ -266,8 +355,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
// 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this // 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
Set<Term> terms = extractQueryTerms(spanTermQuery1); Result result = extractQueryTerms(spanTermQuery1);
assertTermsEqual(terms, spanTermQuery1.getTerm()); assertThat(result.verified, is(true));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
} }
public void testExtractQueryMetadata_spanNearQuery() { public void testExtractQueryMetadata_spanNearQuery() {
@ -275,48 +365,109 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true) SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
.addClause(spanTermQuery1).addClause(spanTermQuery2).build(); .addClause(spanTermQuery1).addClause(spanTermQuery2).build();
Set<Term> terms = extractQueryTerms(spanNearQuery);
assertTermsEqual(terms, spanTermQuery2.getTerm()); Result result = extractQueryTerms(spanNearQuery);
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery2.getTerm());
} }
public void testExtractQueryMetadata_spanOrQuery() { public void testExtractQueryMetadata_spanOrQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2); SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
Set<Term> terms = extractQueryTerms(spanOrQuery); Result result = extractQueryTerms(spanOrQuery);
assertTermsEqual(terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm()); assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
} }
public void testExtractQueryMetadata_spanFirstQuery() { public void testExtractQueryMetadata_spanFirstQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20); SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
Set<Term> terms = extractQueryTerms(spanFirstQuery); Result result = extractQueryTerms(spanFirstQuery);
assertTermsEqual(terms, spanTermQuery1.getTerm()); assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
} }
public void testExtractQueryMetadata_spanNotQuery() { public void testExtractQueryMetadata_spanNotQuery() {
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2); SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
Set<Term> terms = extractQueryTerms(spanNotQuery); Result result = extractQueryTerms(spanNotQuery);
assertTermsEqual(terms, spanTermQuery1.getTerm()); assertThat(result.verified, is(false));
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
} }
public void testExtractQueryMetadata_matchNoDocsQuery() { public void testExtractQueryMetadata_matchNoDocsQuery() {
Set<Term> terms = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all")); Result result = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
assertEquals(0, terms.size()); assertThat(result.verified, is(true));
assertEquals(0, result.terms.size());
BooleanQuery.Builder bq = new BooleanQuery.Builder(); BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
terms = extractQueryTerms(bq.build()); result = extractQueryTerms(bq.build());
assertEquals(0, terms.size()); assertThat(result.verified, is(false));
assertEquals(0, result.terms.size());
bq = new BooleanQuery.Builder(); bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
terms = extractQueryTerms(bq.build()); result = extractQueryTerms(bq.build());
assertTermsEqual(terms, new Term("field", "value")); assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
1f
);
result = extractQueryTerms(disjunctionMaxQuery);
assertThat(result.verified, is(true));
assertTermsEqual(result.terms, new Term("field", "value"));
}
public void testExtractQueryMetadata_matchAllDocsQuery() {
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(new MatchAllDocsQuery()));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
Result result = extractQueryTerms(builder.build());
assertThat(result.verified, is(false));
assertTermsEqual(result.terms, new Term("field", "value"));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq1));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq2 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq3 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq3));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq4 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq4));
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq5 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq5));
} }
public void testExtractQueryMetadata_unsupportedQuery() { public void testExtractQueryMetadata_unsupportedQuery() {
@ -343,8 +494,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(unsupportedQuery, BooleanClause.Occur.MUST); builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build(); BooleanQuery bq1 = builder.build();
Set<Term> terms = extractQueryTerms(bq1); Result result = extractQueryTerms(bq1);
assertTermsEqual(terms, termQuery1.getTerm()); assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery1.getTerm());
TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term")); TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term"));
builder = new BooleanQuery.Builder(); builder = new BooleanQuery.Builder();
@ -352,8 +504,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
builder.add(termQuery2, BooleanClause.Occur.MUST); builder.add(termQuery2, BooleanClause.Occur.MUST);
builder.add(unsupportedQuery, BooleanClause.Occur.MUST); builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
bq1 = builder.build(); bq1 = builder.build();
terms = extractQueryTerms(bq1); result = extractQueryTerms(bq1);
assertTermsEqual(terms, termQuery2.getTerm()); assertThat(result.verified, is(false));
assertTermsEqual(result.terms, termQuery2.getTerm());
builder = new BooleanQuery.Builder(); builder = new BooleanQuery.Builder();
builder.add(unsupportedQuery, BooleanClause.Occur.MUST); builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
@ -372,7 +525,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
); );
List<Term> terms = new ArrayList<>(extractQueryTerms(disjunctionMaxQuery)); Result result = extractQueryTerms(disjunctionMaxQuery);
assertThat(result.verified, is(true));
List<Term> terms = new ArrayList<>(result.terms);
Collections.sort(terms);
assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
assertThat(terms.get(1).field(), equalTo(termQuery2.getTerm().field()));
assertThat(terms.get(1).bytes(), equalTo(termQuery2.getTerm().bytes()));
assertThat(terms.get(2).field(), equalTo(termQuery3.getTerm().field()));
assertThat(terms.get(2).bytes(), equalTo(termQuery3.getTerm().bytes()));
assertThat(terms.get(3).field(), equalTo(termQuery4.getTerm().field()));
assertThat(terms.get(3).bytes(), equalTo(termQuery4.getTerm().bytes()));
disjunctionMaxQuery = new DisjunctionMaxQuery(
Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
);
result = extractQueryTerms(disjunctionMaxQuery);
assertThat(result.verified, is(false));
terms = new ArrayList<>(result.terms);
Collections.sort(terms); Collections.sort(terms);
assertThat(terms.size(), equalTo(4)); assertThat(terms.size(), equalTo(4));
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field())); assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@ -394,7 +567,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
TermsQuery query = (TermsQuery) TermsQuery query = (TermsQuery)
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD); createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));
PrefixCodedTerms terms = query.getTermData(); PrefixCodedTerms terms = query.getTermData();
assertThat(terms.size(), equalTo(15L)); assertThat(terms.size(), equalTo(15L));
@ -413,7 +586,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
assertTermIterator(termIterator, "", UNKNOWN_QUERY_FIELD); assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
} }
public void testSelectTermsListWithHighestSumOfTermLength() { public void testSelectTermsListWithHighestSumOfTermLength() {

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.index.memory.MemoryIndex;
@ -37,14 +38,21 @@ import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery; import org.apache.lucene.search.spans.SpanNotQuery;
@ -52,6 +60,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.Uid;
@ -61,8 +70,11 @@ import org.junit.After;
import org.junit.Before; import org.junit.Before;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.function.Function;
import static org.hamcrest.Matchers.arrayWithSize; import static org.hamcrest.Matchers.arrayWithSize;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
@ -72,7 +84,7 @@ public class PercolateQueryTests extends ESTestCase {
public final static String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms"; public final static String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
public final static String UNKNOWN_QUERY_FIELD_NAME = "unknown_query"; public final static String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
public static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType(); public final static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
static { static {
EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false); EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false);
@ -247,34 +259,91 @@ public class PercolateQueryTests extends ESTestCase {
} }
public void testDuel() throws Exception { public void testDuel() throws Exception {
int numQueries = scaledRandomIntBetween(32, 256); List<Function<String, Query>> queries = new ArrayList<>();
for (int i = 0; i < numQueries; i++) { queries.add((id) -> new PrefixQuery(new Term("field", id)));
String id = Integer.toString(i); queries.add((id) -> new WildcardQuery(new Term("field", id + "*")));
Query query; queries.add((id) -> new CustomQuery(new Term("field", id)));
queries.add((id) -> new SpanTermQuery(new Term("field", id)));
queries.add((id) -> new TermQuery(new Term("field", id)));
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
if (randomBoolean()) { if (randomBoolean()) {
query = new PrefixQuery(new Term("field", id)); builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
} else if (randomBoolean()) {
query = new WildcardQuery(new Term("field", id + "*"));
} else if (randomBoolean()) {
query = new CustomQuery(new Term("field", id + "*"));
} else if (randomBoolean()) {
query = new SpanTermQuery(new Term("field", id));
} else {
query = new TermQuery(new Term("field", id));
} }
addPercolatorQuery(id, query); if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queries.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
return builder.build();
});
queries.add((id) -> new MatchAllDocsQuery());
queries.add((id) -> new MatchNoDocsQuery("no reason at all"));
int numDocs = randomIntBetween(queries.size(), queries.size() * 3);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
addPercolatorQuery(id, queries.get(i % queries.size()).apply(id));
} }
indexWriter.close(); indexWriter.close();
directoryReader = DirectoryReader.open(directory); directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader); IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
for (int i = 0; i < numQueries; i++) { for (int i = 0; i < numDocs; i++) {
MemoryIndex memoryIndex = new MemoryIndex();
String id = Integer.toString(i); String id = Integer.toString(i);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", id, new WhitespaceAnalyzer()); memoryIndex.addField("field", id, new WhitespaceAnalyzer());
duelRun(memoryIndex, shardSearcher); duelRun(memoryIndex, shardSearcher);
} }
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
duelRun(memoryIndex, shardSearcher);
// Empty percolator doc:
memoryIndex = new MemoryIndex();
duelRun(memoryIndex, shardSearcher);
} }
public void testDuelSpecificQueries() throws Exception { public void testDuelSpecificQueries() throws Exception {
@ -312,6 +381,8 @@ public class PercolateQueryTests extends ESTestCase {
indexWriter.close(); indexWriter.close();
directoryReader = DirectoryReader.open(directory); directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader); IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
MemoryIndex memoryIndex = new MemoryIndex(); MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
@ -332,33 +403,33 @@ public class PercolateQueryTests extends ESTestCase {
} }
private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException { private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
boolean requireScore = randomBoolean();
IndexSearcher percolateSearcher = memoryIndex.createSearcher(); IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery.Builder builder1 = new PercolateQuery.Builder( PercolateQuery.Builder builder = new PercolateQuery.Builder(
"docType", "docType",
queryStore, queryStore,
new BytesArray("{}"), new BytesArray("{}"),
percolateSearcher percolateSearcher
); );
// enables the optimization that prevents queries from being evaluated that don't match // enables the optimization that prevents queries from being evaluated that don't match
builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10); Query query = requireScore ? builder.build() : new ConstantScoreQuery(builder.build());
TopDocs topDocs = shardSearcher.search(query, 10);
PercolateQuery.Builder builder2 = new PercolateQuery.Builder( Query controlQuery = new ControlQuery(memoryIndex, queryStore);
"docType", controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
queryStore, TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
new BytesArray("{}"), assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
percolateSearcher assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
); for (int j = 0; j < topDocs.scoreDocs.length; j++) {
builder2.setPercolateTypeQuery(new MatchAllDocsQuery()); assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10); assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits)); if (requireScore) {
assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length)); Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
for (int j = 0; j < topDocs1.scoreDocs.length; j++) { Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc)); assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
assertThat(topDocs1.scoreDocs[j].score, equalTo(topDocs2.scoreDocs[j].score)); assertThat(explain1.getValue(), equalTo(explain2.getValue()));
Explanation explain1 = shardSearcher.explain(builder1.build(), topDocs1.scoreDocs[j].doc); }
Explanation explain2 = shardSearcher.explain(builder2.build(), topDocs2.scoreDocs[j].doc);
assertThat(explain1.toHtml(), equalTo(explain2.toHtml()));
} }
} }
@ -391,4 +462,89 @@ public class PercolateQueryTests extends ESTestCase {
} }
} }
private final static class ControlQuery extends Query {
private final MemoryIndex memoryIndex;
private final PercolateQuery.QueryStore queryStore;
private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
this.memoryIndex = memoryIndex;
this.queryStore = queryStore;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
return new ConstantScoreWeight(this) {
float _score;
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer != null) {
int result = scorer.iterator().advance(doc);
if (result == doc) {
return Explanation.match(scorer.score(), "ControlQuery");
}
}
return Explanation.noMatch("ControlQuery");
}
@Override
public String toString() {
return "weight(" + ControlQuery.this + ")";
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context);
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
@Override
protected boolean match(int doc) {
try {
Query query = leaf.getQuery(doc);
float score = memoryIndex.search(query);
if (score != 0f) {
if (needsScores) {
_score = score;
}
return true;
} else {
return false;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) {
@Override
public float score() throws IOException {
return _score;
}
};
}
};
}
@Override
public String toString(String field) {
return "control{" + field + "}";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return classHash();
}
}
} }

View File

@ -49,6 +49,8 @@ import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery; import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery; import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.instanceOf;
@ -71,7 +73,9 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
indexService = createIndex("test", Settings.EMPTY); indexService = createIndex("test", Settings.EMPTY);
mapperService = indexService.mapperService(); mapperService = indexService.mapperService();
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") String mapper = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
.startObject("properties")
.startObject("field").field("type", "text").endObject() .startObject("field").field("type", "text").endObject()
.startObject("number_field").field("type", "long").endObject() .startObject("number_field").field("type", "long").endObject()
.startObject("date_field").field("type", "date").endObject() .startObject("date_field").field("type", "date").endObject()
@ -96,20 +100,21 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.field(fieldName, queryBuilder) .field(fieldName, queryBuilder)
.endObject().bytes()); .endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1)); assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value")); assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1)); assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_COMPLETE));
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue(); BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder); assertQueryBuilder(qbSource, queryBuilder);
// add an query for which we don't extract terms from // add an query for which we don't extract terms from
queryBuilder = matchAllQuery(); queryBuilder = rangeQuery("field").from("a").to("z");
doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject() doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
.field(fieldName, queryBuilder) .field(fieldName, queryBuilder)
.endObject().bytes()); .endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(1)); assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName())[0].binaryValue(), equalTo(new BytesRef())); assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_FAILED));
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0)); assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1)); assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue(); qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
@ -195,6 +200,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
public void testMultiplePercolatorFields() throws Exception { public void testMultiplePercolatorFields() throws Exception {
String typeName = "another_type"; String typeName = "another_type";
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName) String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
.startObject("properties") .startObject("properties")
.startObject("query_field1").field("type", "percolator").endObject() .startObject("query_field1").field("type", "percolator").endObject()
.startObject("query_field2").field("type", "percolator").endObject() .startObject("query_field2").field("type", "percolator").endObject()
@ -209,7 +215,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.field("query_field2", queryBuilder) .field("query_field2", queryBuilder)
.endObject().bytes() .endObject().bytes()
); );
assertThat(doc.rootDoc().getFields().size(), equalTo(22)); // also includes all other meta fields assertThat(doc.rootDoc().getFields().size(), equalTo(11)); // also includes _uid (1), type (2), source (1)
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue(); BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder); assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
@ -221,6 +227,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
public void testNestedPercolatorField() throws Exception { public void testNestedPercolatorField() throws Exception {
String typeName = "another_type"; String typeName = "another_type";
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName) String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
.startObject("properties") .startObject("properties")
.startObject("object_field") .startObject("object_field")
.field("type", "object") .field("type", "object")
@ -238,7 +245,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.field("query_field", queryBuilder) .field("query_field", queryBuilder)
.endObject().endObject().bytes() .endObject().endObject().bytes()
); );
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue(); BytesRef queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder); assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
@ -249,7 +256,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
.endArray() .endArray()
.endObject().bytes() .endObject().bytes()
); );
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue(); queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder); assertQueryBuilder(queryBuilderAsBytes, queryBuilder);

View File

@ -397,12 +397,16 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
.addMapping("employee", mapping) .addMapping("employee", mapping)
.addMapping("queries", "query", "type=percolator") .addMapping("queries", "query", "type=percolator")
); );
client().prepareIndex("test", "queries", "q").setSource(jsonBuilder().startObject() client().prepareIndex("test", "queries", "q1").setSource(jsonBuilder().startObject()
.field("query", QueryBuilders.nestedQuery("employee", .field("query", QueryBuilders.nestedQuery("employee",
QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg) QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
).endObject()) ).endObject())
.setRefreshPolicy(IMMEDIATE)
.get(); .get();
// this query should never match as it doesn't use nested query:
client().prepareIndex("test", "queries", "q2").setSource(jsonBuilder().startObject()
.field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject())
.get();
client().admin().indices().prepareRefresh().get();
SearchResponse response = client().prepareSearch() SearchResponse response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "employee", .setQuery(new PercolateQueryBuilder("query", "employee",
@ -413,9 +417,10 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
.startObject().field("name", "tony stark").endObject() .startObject().field("name", "tony stark").endObject()
.endArray() .endArray()
.endObject().bytes())) .endObject().bytes()))
.addSort("_doc", SortOrder.ASC)
.get(); .get();
assertHitCount(response, 1); assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("q")); assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
response = client().prepareSearch() response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "employee", .setQuery(new PercolateQueryBuilder("query", "employee",
@ -426,12 +431,14 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
.startObject().field("name", "tony stark").endObject() .startObject().field("name", "tony stark").endObject()
.endArray() .endArray()
.endObject().bytes())) .endObject().bytes()))
.addSort("_doc", SortOrder.ASC)
.get(); .get();
assertHitCount(response, 0); assertHitCount(response, 0);
response = client().prepareSearch() response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", "employee", .setQuery(new PercolateQueryBuilder("query", "employee",
XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject().bytes())) XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject().bytes()))
.addSort("_doc", SortOrder.ASC)
.get(); .get();
assertHitCount(response, 0); assertHitCount(response, 0);
} }