percolator: Don't verify candidate matches with MemoryIndex that are verified matches
If we don't care about scoring then for certain candidate matches we can be certain, that if they are a candidate match, then they will always match. So verifying these queries with the MemoryIndex can be skipped.
This commit is contained in:
parent
6d5b4a78fe
commit
599a548998
|
@ -19,6 +19,7 @@
|
|||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
|
@ -27,6 +28,8 @@ import org.apache.lucene.document.TextField;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NoDeletionPolicy;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -35,9 +38,11 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MMapDirectory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -49,9 +54,6 @@ import java.util.Set;
|
|||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class LuceneTests extends ESTestCase {
|
||||
public void testWaitForIndex() throws Exception {
|
||||
final MockDirectoryWrapper dir = newMockDirectory();
|
||||
|
@ -355,6 +357,45 @@ public class LuceneTests extends ESTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testAsSequentialAccessBits() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
Weight termWeight = new TermQuery(new Term("foo", "bar")).createWeight(searcher, false);
|
||||
assertEquals(1, reader.leaves().size());
|
||||
LeafReaderContext leafReaderContext = reader.leaves().get(0);
|
||||
Bits bits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), termWeight.scorer(leafReaderContext));
|
||||
|
||||
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(-1));
|
||||
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(leafReaderContext.reader().maxDoc()));
|
||||
assertTrue(bits.get(0));
|
||||
assertTrue(bits.get(0));
|
||||
assertFalse(bits.get(1));
|
||||
assertFalse(bits.get(1));
|
||||
expectThrows(IllegalArgumentException.class, () -> bits.get(0));
|
||||
assertTrue(bits.get(2));
|
||||
assertTrue(bits.get(2));
|
||||
expectThrows(IllegalArgumentException.class, () -> bits.get(1));
|
||||
}
|
||||
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that the "unmap hack" is detected as supported by lucene.
|
||||
* This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038
|
||||
|
|
|
@ -53,10 +53,13 @@ import org.elasticsearch.index.mapper.ParseContext;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* Utility to extract query terms from queries and create queries from documents.
|
||||
|
@ -64,64 +67,179 @@ import java.util.Set;
|
|||
public final class ExtractQueryTermsService {
|
||||
|
||||
private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
|
||||
public static final String EXTRACTION_COMPLETE = "complete";
|
||||
public static final String EXTRACTION_PARTIAL = "partial";
|
||||
public static final String EXTRACTION_FAILED = "failed";
|
||||
|
||||
static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
|
||||
|
||||
static {
|
||||
Map<Class<? extends Query>, Function<Query, Result>> map = new HashMap<>(16);
|
||||
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
|
||||
map.put(ConstantScoreQuery.class, constantScoreQuery());
|
||||
map.put(BoostQuery.class, boostQuery());
|
||||
map.put(TermQuery.class, termQuery());
|
||||
map.put(TermsQuery.class, termsQuery());
|
||||
map.put(CommonTermsQuery.class, commonTermsQuery());
|
||||
map.put(BlendedTermQuery.class, blendedTermQuery());
|
||||
map.put(PhraseQuery.class, phraseQuery());
|
||||
map.put(SpanTermQuery.class, spanTermQuery());
|
||||
map.put(SpanNearQuery.class, spanNearQuery());
|
||||
map.put(SpanOrQuery.class, spanOrQuery());
|
||||
map.put(SpanFirstQuery.class, spanFirstQuery());
|
||||
map.put(SpanNotQuery.class, spanNotQuery());
|
||||
map.put(BooleanQuery.class, booleanQuery());
|
||||
map.put(DisjunctionMaxQuery.class, disjunctionMaxQuery());
|
||||
queryProcessors = Collections.unmodifiableMap(map);
|
||||
}
|
||||
|
||||
private ExtractQueryTermsService() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all terms from the specified query and adds it to the specified document.
|
||||
*
|
||||
* @param query The query to extract terms from
|
||||
* @param document The document to add the extracted terms to
|
||||
* @param queryTermsFieldField The field in the document holding the extracted terms
|
||||
* @param unknownQueryField The field used to mark a document that not all query terms could be extracted.
|
||||
* For example the query contained an unsupported query (e.g. WildcardQuery).
|
||||
* @param fieldType The field type for the query metadata field
|
||||
* @param extractionResultField The field contains whether query term extraction was successful, partial or
|
||||
* failed. (For example the query contained an unsupported query (e.g. WildcardQuery)
|
||||
* then query extraction would fail)
|
||||
* @param fieldType The field type for the query metadata field
|
||||
*/
|
||||
public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField,
|
||||
String unknownQueryField, FieldType fieldType) {
|
||||
Set<Term> queryTerms;
|
||||
String extractionResultField, FieldType fieldType) {
|
||||
Result result;
|
||||
try {
|
||||
queryTerms = extractQueryTerms(query);
|
||||
result = extractQueryTerms(query);
|
||||
} catch (UnsupportedQueryException e) {
|
||||
document.add(new Field(unknownQueryField, new BytesRef(), fieldType));
|
||||
document.add(new Field(extractionResultField, EXTRACTION_FAILED, fieldType));
|
||||
return;
|
||||
}
|
||||
for (Term term : queryTerms) {
|
||||
for (Term term : result.terms) {
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
builder.append(new BytesRef(term.field()));
|
||||
builder.append(FIELD_VALUE_SEPARATOR);
|
||||
builder.append(term.bytes());
|
||||
document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType));
|
||||
}
|
||||
if (result.verified) {
|
||||
document.add(new Field(extractionResultField, EXTRACTION_COMPLETE, fieldType));
|
||||
} else {
|
||||
document.add(new Field(extractionResultField, EXTRACTION_PARTIAL, fieldType));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a terms query containing all terms from all fields of the specified index reader.
|
||||
*/
|
||||
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
|
||||
Term... optionalTerms) throws IOException {
|
||||
Objects.requireNonNull(queryMetadataField);
|
||||
|
||||
List<Term> extractedTerms = new ArrayList<>();
|
||||
Collections.addAll(extractedTerms, optionalTerms);
|
||||
|
||||
Fields fields = MultiFields.getFields(indexReader);
|
||||
for (String field : fields) {
|
||||
Terms terms = fields.terms(field);
|
||||
if (terms == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
BytesRef fieldBr = new BytesRef(field);
|
||||
TermsEnum tenum = terms.iterator();
|
||||
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
builder.append(fieldBr);
|
||||
builder.append(FIELD_VALUE_SEPARATOR);
|
||||
builder.append(term);
|
||||
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
|
||||
}
|
||||
}
|
||||
return new TermsQuery(extractedTerms);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all query terms from the provided query and adds it to specified list.
|
||||
*
|
||||
* <p>
|
||||
* From boolean query with no should clauses or phrase queries only the longest term are selected,
|
||||
* since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
|
||||
*
|
||||
* <p>
|
||||
* If from part of the query, no query terms can be extracted then term extraction is stopped and
|
||||
* an UnsupportedQueryException is thrown.
|
||||
*/
|
||||
static Set<Term> extractQueryTerms(Query query) {
|
||||
if (query instanceof MatchNoDocsQuery) {
|
||||
// no terms to extract as this query matches no docs
|
||||
return Collections.emptySet();
|
||||
} else if (query instanceof TermQuery) {
|
||||
return Collections.singleton(((TermQuery) query).getTerm());
|
||||
} else if (query instanceof TermsQuery) {
|
||||
Set<Term> terms = new HashSet<>();
|
||||
static Result extractQueryTerms(Query query) {
|
||||
Class queryClass = query.getClass();
|
||||
if (queryClass.isAnonymousClass()) {
|
||||
// Sometimes queries have anonymous classes in that case we need the direct super class.
|
||||
// (for example blended term query)
|
||||
queryClass = queryClass.getSuperclass();
|
||||
}
|
||||
Function<Query, Result> queryProcessor = queryProcessors.get(queryClass);
|
||||
if (queryProcessor != null) {
|
||||
return queryProcessor.apply(query);
|
||||
} else {
|
||||
throw new UnsupportedQueryException(query);
|
||||
}
|
||||
}
|
||||
|
||||
static Function<Query, Result> matchNoDocsQuery() {
|
||||
return (query -> new Result(true, Collections.emptySet()));
|
||||
}
|
||||
|
||||
static Function<Query, Result> constantScoreQuery() {
|
||||
return query -> {
|
||||
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
|
||||
return extractQueryTerms(wrappedQuery);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> boostQuery() {
|
||||
return query -> {
|
||||
Query wrappedQuery = ((BoostQuery) query).getQuery();
|
||||
return extractQueryTerms(wrappedQuery);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> termQuery() {
|
||||
return (query -> {
|
||||
TermQuery termQuery = (TermQuery) query;
|
||||
return new Result(true, Collections.singleton(termQuery.getTerm()));
|
||||
});
|
||||
}
|
||||
|
||||
static Function<Query, Result> termsQuery() {
|
||||
return query -> {
|
||||
TermsQuery termsQuery = (TermsQuery) query;
|
||||
Set<Term> terms = new HashSet<>();
|
||||
PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator();
|
||||
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
|
||||
terms.add(new Term(iterator.field(), term));
|
||||
}
|
||||
return terms;
|
||||
} else if (query instanceof PhraseQuery) {
|
||||
return new Result(true, terms);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> commonTermsQuery() {
|
||||
return query -> {
|
||||
List<Term> terms = ((CommonTermsQuery) query).getTerms();
|
||||
return new Result(false, new HashSet<>(terms));
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> blendedTermQuery() {
|
||||
return query -> {
|
||||
List<Term> terms = ((BlendedTermQuery) query).getTerms();
|
||||
return new Result(true, new HashSet<>(terms));
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> phraseQuery() {
|
||||
return query -> {
|
||||
Term[] terms = ((PhraseQuery) query).getTerms();
|
||||
if (terms.length == 0) {
|
||||
return Collections.emptySet();
|
||||
return new Result(true, Collections.emptySet());
|
||||
}
|
||||
|
||||
// the longest term is likely to be the rarest,
|
||||
|
@ -132,19 +250,76 @@ public final class ExtractQueryTermsService {
|
|||
longestTerm = term;
|
||||
}
|
||||
}
|
||||
return Collections.singleton(longestTerm);
|
||||
} else if (query instanceof BooleanQuery) {
|
||||
List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
|
||||
boolean hasRequiredClauses = false;
|
||||
return new Result(false, Collections.singleton(longestTerm));
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> spanTermQuery() {
|
||||
return query -> {
|
||||
Term term = ((SpanTermQuery) query).getTerm();
|
||||
return new Result(true, Collections.singleton(term));
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> spanNearQuery() {
|
||||
return query -> {
|
||||
Set<Term> bestClauses = null;
|
||||
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
|
||||
for (SpanQuery clause : spanNearQuery.getClauses()) {
|
||||
Result temp = extractQueryTerms(clause);
|
||||
bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses);
|
||||
}
|
||||
return new Result(false, bestClauses);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> spanOrQuery() {
|
||||
return query -> {
|
||||
Set<Term> terms = new HashSet<>();
|
||||
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
|
||||
for (SpanQuery clause : spanOrQuery.getClauses()) {
|
||||
terms.addAll(extractQueryTerms(clause).terms);
|
||||
}
|
||||
return new Result(false, terms);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> spanNotQuery() {
|
||||
return query -> {
|
||||
Result result = extractQueryTerms(((SpanNotQuery) query).getInclude());
|
||||
return new Result(false, result.terms);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> spanFirstQuery() {
|
||||
return query -> {
|
||||
Result result = extractQueryTerms(((SpanFirstQuery) query).getMatch());
|
||||
return new Result(false, result.terms);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> booleanQuery() {
|
||||
return query -> {
|
||||
BooleanQuery bq = (BooleanQuery) query;
|
||||
List<BooleanClause> clauses = bq.clauses();
|
||||
int minimumShouldMatch = bq.getMinimumNumberShouldMatch();
|
||||
int numRequiredClauses = 0;
|
||||
int numOptionalClauses = 0;
|
||||
int numProhibitedClauses = 0;
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.isRequired()) {
|
||||
hasRequiredClauses = true;
|
||||
break;
|
||||
numRequiredClauses++;
|
||||
}
|
||||
if (clause.isProhibited()) {
|
||||
numProhibitedClauses++;
|
||||
}
|
||||
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
|
||||
numOptionalClauses++;
|
||||
}
|
||||
}
|
||||
if (hasRequiredClauses) {
|
||||
UnsupportedQueryException uqe = null;
|
||||
if (numRequiredClauses > 0) {
|
||||
Set<Term> bestClause = null;
|
||||
UnsupportedQueryException uqe = null;
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.isRequired() == false) {
|
||||
// skip must_not clauses, we don't need to remember the things that do *not* match...
|
||||
|
@ -153,77 +328,56 @@ public final class ExtractQueryTermsService {
|
|||
continue;
|
||||
}
|
||||
|
||||
Set<Term> temp;
|
||||
Result temp;
|
||||
try {
|
||||
temp = extractQueryTerms(clause.getQuery());
|
||||
} catch (UnsupportedQueryException e) {
|
||||
uqe = e;
|
||||
continue;
|
||||
}
|
||||
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
|
||||
bestClause = selectTermListWithTheLongestShortestTerm(temp.terms, bestClause);
|
||||
}
|
||||
if (bestClause != null) {
|
||||
return bestClause;
|
||||
return new Result(false, bestClause);
|
||||
} else {
|
||||
if (uqe != null) {
|
||||
// we're unable to select the best clause and an exception occurred, so we bail
|
||||
throw uqe;
|
||||
} else {
|
||||
// We didn't find a clause and no exception occurred, so this bq only contained MatchNoDocsQueries,
|
||||
return new Result(true, Collections.emptySet());
|
||||
}
|
||||
return Collections.emptySet();
|
||||
}
|
||||
} else {
|
||||
Set<Term> terms = new HashSet<>();
|
||||
List<Query> disjunctions = new ArrayList<>(numOptionalClauses);
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.isProhibited()) {
|
||||
// we don't need to remember the things that do *not* match...
|
||||
continue;
|
||||
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
|
||||
disjunctions.add(clause.getQuery());
|
||||
}
|
||||
terms.addAll(extractQueryTerms(clause.getQuery()));
|
||||
}
|
||||
return terms;
|
||||
return handleDisjunction(disjunctions, minimumShouldMatch, numProhibitedClauses > 0);
|
||||
}
|
||||
} else if (query instanceof ConstantScoreQuery) {
|
||||
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
|
||||
return extractQueryTerms(wrappedQuery);
|
||||
} else if (query instanceof BoostQuery) {
|
||||
Query wrappedQuery = ((BoostQuery) query).getQuery();
|
||||
return extractQueryTerms(wrappedQuery);
|
||||
} else if (query instanceof CommonTermsQuery) {
|
||||
List<Term> terms = ((CommonTermsQuery) query).getTerms();
|
||||
return new HashSet<>(terms);
|
||||
} else if (query instanceof BlendedTermQuery) {
|
||||
List<Term> terms = ((BlendedTermQuery) query).getTerms();
|
||||
return new HashSet<>(terms);
|
||||
} else if (query instanceof DisjunctionMaxQuery) {
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> disjunctionMaxQuery() {
|
||||
return query -> {
|
||||
List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts();
|
||||
Set<Term> terms = new HashSet<>();
|
||||
for (Query disjunct : disjuncts) {
|
||||
terms.addAll(extractQueryTerms(disjunct));
|
||||
return handleDisjunction(disjuncts, 1, false);
|
||||
};
|
||||
}
|
||||
|
||||
static Result handleDisjunction(List<Query> disjunctions, int minimumShouldMatch, boolean otherClauses) {
|
||||
boolean verified = minimumShouldMatch <= 1 && otherClauses == false;
|
||||
Set<Term> terms = new HashSet<>();
|
||||
for (Query disjunct : disjunctions) {
|
||||
Result subResult = extractQueryTerms(disjunct);
|
||||
if (subResult.verified == false) {
|
||||
verified = false;
|
||||
}
|
||||
return terms;
|
||||
} else if (query instanceof SpanTermQuery) {
|
||||
return Collections.singleton(((SpanTermQuery) query).getTerm());
|
||||
} else if (query instanceof SpanNearQuery) {
|
||||
Set<Term> bestClause = null;
|
||||
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
|
||||
for (SpanQuery clause : spanNearQuery.getClauses()) {
|
||||
Set<Term> temp = extractQueryTerms(clause);
|
||||
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
|
||||
}
|
||||
return bestClause;
|
||||
} else if (query instanceof SpanOrQuery) {
|
||||
Set<Term> terms = new HashSet<>();
|
||||
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
|
||||
for (SpanQuery clause : spanOrQuery.getClauses()) {
|
||||
terms.addAll(extractQueryTerms(clause));
|
||||
}
|
||||
return terms;
|
||||
} else if (query instanceof SpanFirstQuery) {
|
||||
return extractQueryTerms(((SpanFirstQuery)query).getMatch());
|
||||
} else if (query instanceof SpanNotQuery) {
|
||||
return extractQueryTerms(((SpanNotQuery) query).getInclude());
|
||||
} else {
|
||||
throw new UnsupportedQueryException(query);
|
||||
terms.addAll(subResult.terms);
|
||||
}
|
||||
return new Result(verified, terms);
|
||||
}
|
||||
|
||||
static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) {
|
||||
|
@ -243,7 +397,7 @@ public final class ExtractQueryTermsService {
|
|||
}
|
||||
}
|
||||
|
||||
private static int minTermLength(Set<Term> terms) {
|
||||
static int minTermLength(Set<Term> terms) {
|
||||
int min = Integer.MAX_VALUE;
|
||||
for (Term term : terms) {
|
||||
min = Math.min(min, term.bytes().length);
|
||||
|
@ -251,40 +405,22 @@ public final class ExtractQueryTermsService {
|
|||
return min;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a boolean query with a should clause for each term on all fields of the specified index reader.
|
||||
*/
|
||||
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
|
||||
String unknownQueryField) throws IOException {
|
||||
Objects.requireNonNull(queryMetadataField);
|
||||
Objects.requireNonNull(unknownQueryField);
|
||||
static class Result {
|
||||
|
||||
List<Term> extractedTerms = new ArrayList<>();
|
||||
extractedTerms.add(new Term(unknownQueryField));
|
||||
Fields fields = MultiFields.getFields(indexReader);
|
||||
for (String field : fields) {
|
||||
Terms terms = fields.terms(field);
|
||||
if (terms == null) {
|
||||
continue;
|
||||
}
|
||||
final Set<Term> terms;
|
||||
final boolean verified;
|
||||
|
||||
BytesRef fieldBr = new BytesRef(field);
|
||||
TermsEnum tenum = terms.iterator();
|
||||
for (BytesRef term = tenum.next(); term != null ; term = tenum.next()) {
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
builder.append(fieldBr);
|
||||
builder.append(FIELD_VALUE_SEPARATOR);
|
||||
builder.append(term);
|
||||
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
|
||||
}
|
||||
Result(boolean verified, Set<Term> terms) {
|
||||
this.terms = terms;
|
||||
this.verified = verified;
|
||||
}
|
||||
return new TermsQuery(extractedTerms);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Exception indicating that none or some query terms couldn't extracted from a percolator query.
|
||||
*/
|
||||
public static class UnsupportedQueryException extends RuntimeException {
|
||||
static class UnsupportedQueryException extends RuntimeException {
|
||||
|
||||
private final Query unsupportedQuery;
|
||||
|
||||
|
|
|
@ -28,12 +28,15 @@ import org.apache.lucene.search.Explanation;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
@ -54,6 +57,7 @@ public final class PercolateQuery extends Query implements Accountable {
|
|||
private final IndexSearcher percolatorIndexSearcher;
|
||||
|
||||
private Query queriesMetaDataQuery;
|
||||
private Query verifiedQueriesQuery = new MatchNoDocsQuery("");
|
||||
private Query percolateTypeQuery;
|
||||
|
||||
/**
|
||||
|
@ -64,21 +68,32 @@ public final class PercolateQuery extends Query implements Accountable {
|
|||
*/
|
||||
public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) {
|
||||
this.docType = Objects.requireNonNull(docType);
|
||||
this.queryStore = Objects.requireNonNull(queryStore);
|
||||
this.documentSource = Objects.requireNonNull(documentSource);
|
||||
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
|
||||
this.queryStore = Objects.requireNonNull(queryStore);
|
||||
}
|
||||
|
||||
/**
|
||||
* Optionally sets a query that reduces the number of queries to percolate based on extracted terms from
|
||||
* the document to be percolated.
|
||||
*
|
||||
* @param extractedTermsFieldName The name of the field to get the extracted terms from
|
||||
* @param unknownQueryFieldname The field used to mark documents whose queries couldn't all get extracted
|
||||
* @param extractedTermsFieldName The name of the field to get the extracted terms from
|
||||
* @param extractionResultField The field to indicate for a document whether query term extraction was complete,
|
||||
* partial or failed. If query extraction was complete, the MemoryIndex doesn't
|
||||
*/
|
||||
public void extractQueryTermsQuery(String extractedTermsFieldName, String unknownQueryFieldname) throws IOException {
|
||||
public void extractQueryTermsQuery(String extractedTermsFieldName, String extractionResultField) throws IOException {
|
||||
// We can only skip the MemoryIndex verification when percolating a single document.
|
||||
// When the document being percolated contains a nested object field then the MemoryIndex contains multiple
|
||||
// documents. In this case the term query that indicates whether memory index verification can be skipped
|
||||
// can incorrectly indicate that non nested queries would match, while their nested variants would not.
|
||||
if (percolatorIndexSearcher.getIndexReader().maxDoc() == 1) {
|
||||
this.verifiedQueriesQuery = new TermQuery(new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_COMPLETE));
|
||||
}
|
||||
this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery(
|
||||
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName, unknownQueryFieldname
|
||||
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName,
|
||||
// include extractionResultField:failed, because docs with this term have no extractedTermsField
|
||||
// and otherwise we would fail to return these docs. Docs that failed query term extraction
|
||||
// always need to be verified by MemoryIndex:
|
||||
new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_FAILED)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -94,14 +109,15 @@ public final class PercolateQuery extends Query implements Accountable {
|
|||
throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata");
|
||||
}
|
||||
// The query that selects which percolator queries will be evaluated by MemoryIndex:
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
BooleanQuery.Builder queriesQuery = new BooleanQuery.Builder();
|
||||
if (percolateTypeQuery != null) {
|
||||
builder.add(percolateTypeQuery, FILTER);
|
||||
queriesQuery.add(percolateTypeQuery, FILTER);
|
||||
}
|
||||
if (queriesMetaDataQuery != null) {
|
||||
builder.add(queriesMetaDataQuery, FILTER);
|
||||
queriesQuery.add(queriesMetaDataQuery, FILTER);
|
||||
}
|
||||
return new PercolateQuery(docType, queryStore, documentSource, builder.build(), percolatorIndexSearcher);
|
||||
return new PercolateQuery(docType, queryStore, documentSource, queriesQuery.build(), percolatorIndexSearcher,
|
||||
verifiedQueriesQuery);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -110,22 +126,25 @@ public final class PercolateQuery extends Query implements Accountable {
|
|||
private final QueryStore queryStore;
|
||||
private final BytesReference documentSource;
|
||||
private final Query percolatorQueriesQuery;
|
||||
private final Query verifiedQueriesQuery;
|
||||
private final IndexSearcher percolatorIndexSearcher;
|
||||
|
||||
private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
|
||||
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher) {
|
||||
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedQueriesQuery) {
|
||||
this.documentType = documentType;
|
||||
this.documentSource = documentSource;
|
||||
this.percolatorQueriesQuery = percolatorQueriesQuery;
|
||||
this.queryStore = queryStore;
|
||||
this.percolatorIndexSearcher = percolatorIndexSearcher;
|
||||
this.verifiedQueriesQuery = verifiedQueriesQuery;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query rewritten = percolatorQueriesQuery.rewrite(reader);
|
||||
if (rewritten != percolatorQueriesQuery) {
|
||||
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher);
|
||||
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher,
|
||||
verifiedQueriesQuery);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
|
@ -133,6 +152,7 @@ public final class PercolateQuery extends Query implements Accountable {
|
|||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
final Weight verifiedQueriesQueryWeight = verifiedQueriesQuery.createWeight(searcher, false);
|
||||
final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores);
|
||||
return new Weight(this) {
|
||||
@Override
|
||||
|
@ -206,6 +226,8 @@ public final class PercolateQuery extends Query implements Accountable {
|
|||
}
|
||||
};
|
||||
} else {
|
||||
Scorer verifiedDocsScorer = verifiedQueriesQueryWeight.scorer(leafReaderContext);
|
||||
Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
|
||||
return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {
|
||||
|
||||
@Override
|
||||
|
@ -214,6 +236,14 @@ public final class PercolateQuery extends Query implements Accountable {
|
|||
}
|
||||
|
||||
boolean matchDocId(int docId) throws IOException {
|
||||
// We use the verifiedDocsBits to skip the expensive MemoryIndex verification.
|
||||
// If docId also appears in the verifiedDocsBits then that means during indexing
|
||||
// we were able to extract all query terms and for this candidate match
|
||||
// and we determined based on the nature of the query that it is safe to skip
|
||||
// the MemoryIndex verification.
|
||||
if (verifiedDocsBits.get(docId)) {
|
||||
return true;
|
||||
}
|
||||
Query query = percolatorQueries.getQuery(docId);
|
||||
return query != null && Lucene.exists(percolatorIndexSearcher, query);
|
||||
}
|
||||
|
|
|
@ -420,10 +420,11 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
|
|||
"] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
|
||||
}
|
||||
PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
|
||||
PercolateQuery.QueryStore queryStore = createStore(pft, context, mapUnmappedFieldsAsString);
|
||||
PercolateQuery.Builder builder = new PercolateQuery.Builder(
|
||||
documentType, createStore(pft, context, mapUnmappedFieldsAsString), document, docSearcher
|
||||
documentType, queryStore, document, docSearcher
|
||||
);
|
||||
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getUnknownQueryFieldName());
|
||||
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getExtractionResultFieldName());
|
||||
return builder.build();
|
||||
} else {
|
||||
Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME));
|
||||
|
|
|
@ -58,7 +58,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType();
|
||||
|
||||
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
|
||||
public static final String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
|
||||
public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
|
||||
public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
|
||||
|
||||
public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
|
||||
|
@ -75,15 +75,15 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
context.path().add(name());
|
||||
KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
|
||||
((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType();
|
||||
KeywordFieldMapper unknownQueryField = createExtractQueryFieldBuilder(UNKNOWN_QUERY_FIELD_NAME, context);
|
||||
((PercolatorFieldType) fieldType).unknownQueryField = unknownQueryField.fieldType();
|
||||
KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context);
|
||||
((PercolatorFieldType) fieldType).extractionResultField = extractionResultField.fieldType();
|
||||
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
|
||||
((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType();
|
||||
context.path().remove();
|
||||
setupFieldType(context);
|
||||
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
|
||||
multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
|
||||
unknownQueryField, queryBuilderField);
|
||||
extractionResultField, queryBuilderField);
|
||||
}
|
||||
|
||||
static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
|
||||
|
@ -102,6 +102,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
builder.fieldType().setDocValuesType(DocValuesType.BINARY);
|
||||
return builder.build(context);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class TypeParser implements FieldMapper.TypeParser {
|
||||
|
@ -115,7 +116,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
public static class PercolatorFieldType extends MappedFieldType {
|
||||
|
||||
private MappedFieldType queryTermsField;
|
||||
private MappedFieldType unknownQueryField;
|
||||
private MappedFieldType extractionResultField;
|
||||
private MappedFieldType queryBuilderField;
|
||||
|
||||
public PercolatorFieldType() {
|
||||
|
@ -127,7 +128,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
public PercolatorFieldType(PercolatorFieldType ref) {
|
||||
super(ref);
|
||||
queryTermsField = ref.queryTermsField;
|
||||
unknownQueryField = ref.unknownQueryField;
|
||||
extractionResultField = ref.extractionResultField;
|
||||
queryBuilderField = ref.queryBuilderField;
|
||||
}
|
||||
|
||||
|
@ -135,8 +136,8 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
return queryTermsField.name();
|
||||
}
|
||||
|
||||
public String getUnknownQueryFieldName() {
|
||||
return unknownQueryField.name();
|
||||
public String getExtractionResultFieldName() {
|
||||
return extractionResultField.name();
|
||||
}
|
||||
|
||||
public String getQueryBuilderFieldName() {
|
||||
|
@ -162,17 +163,17 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
private final boolean mapUnmappedFieldAsString;
|
||||
private final QueryShardContext queryShardContext;
|
||||
private KeywordFieldMapper queryTermsField;
|
||||
private KeywordFieldMapper unknownQueryField;
|
||||
private KeywordFieldMapper extractionResultField;
|
||||
private BinaryFieldMapper queryBuilderField;
|
||||
|
||||
public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, QueryShardContext queryShardContext,
|
||||
KeywordFieldMapper queryTermsField, KeywordFieldMapper unknownQueryField,
|
||||
KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField,
|
||||
BinaryFieldMapper queryBuilderField) {
|
||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||
this.queryShardContext = queryShardContext;
|
||||
this.queryTermsField = queryTermsField;
|
||||
this.unknownQueryField = unknownQueryField;
|
||||
this.extractionResultField = extractionResultField;
|
||||
this.queryBuilderField = queryBuilderField;
|
||||
this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
|
||||
}
|
||||
|
@ -181,18 +182,18 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
|
||||
PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType);
|
||||
KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
|
||||
KeywordFieldMapper unknownQueryUpdated = (KeywordFieldMapper) unknownQueryField.updateFieldType(fullNameToFieldType);
|
||||
KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType);
|
||||
BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
|
||||
|
||||
if (updated == this || queryTermsUpdated == queryTermsField || unknownQueryUpdated == unknownQueryField
|
||||
|| queryBuilderUpdated == queryBuilderField) {
|
||||
if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField
|
||||
&& queryBuilderUpdated == queryBuilderField) {
|
||||
return this;
|
||||
}
|
||||
if (updated == this) {
|
||||
updated = (PercolatorFieldMapper) updated.clone();
|
||||
}
|
||||
updated.queryTermsField = queryTermsUpdated;
|
||||
updated.unknownQueryField = unknownQueryUpdated;
|
||||
updated.extractionResultField = extractionResultUpdated;
|
||||
updated.queryBuilderField = queryBuilderUpdated;
|
||||
return updated;
|
||||
}
|
||||
|
@ -220,7 +221,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder);
|
||||
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), unknownQueryField.name(),
|
||||
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), extractionResultField.name(),
|
||||
queryTermsField.fieldType());
|
||||
return null;
|
||||
}
|
||||
|
@ -258,7 +259,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
|
||||
@Override
|
||||
public Iterator<Mapper> iterator() {
|
||||
return Arrays.<Mapper>asList(queryTermsField, unknownQueryField, queryBuilderField).iterator();
|
||||
return Arrays.<Mapper>asList(queryTermsField, extractionResultField, queryBuilderField).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
|||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.PrefixCodedTerms;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
|
@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
|
@ -44,6 +46,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
||||
import org.elasticsearch.index.mapper.ParseContext;
|
||||
import org.elasticsearch.percolator.ExtractQueryTermsService.Result;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
|
||||
|
@ -54,18 +57,22 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_PARTIAL;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.sameInstance;
|
||||
|
||||
public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||
|
||||
public final static String QUERY_TERMS_FIELD = "extracted_terms";
|
||||
public final static String UNKNOWN_QUERY_FIELD = "unknown_query";
|
||||
public static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
|
||||
public final static String EXTRACTION_RESULT_FIELD = "extraction_result";
|
||||
public final static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
|
||||
|
||||
static {
|
||||
QUERY_TERMS_FIELD_TYPE.setTokenized(false);
|
||||
|
@ -81,33 +88,41 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||
|
||||
ParseContext.Document document = new ParseContext.Document();
|
||||
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||
Collections.sort(document.getFields(), (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
|
||||
assertThat(document.getFields().size(), equalTo(2));
|
||||
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
|
||||
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
|
||||
assertThat(document.getFields().get(1).name(), equalTo(QUERY_TERMS_FIELD));
|
||||
assertThat(document.getFields().get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
|
||||
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_COMPLETE));
|
||||
List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(QUERY_TERMS_FIELD)));
|
||||
Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
|
||||
assertThat(fields.size(), equalTo(2));
|
||||
assertThat(fields.get(0).name(), equalTo(QUERY_TERMS_FIELD));
|
||||
assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
|
||||
assertThat(fields.get(1).name(), equalTo(QUERY_TERMS_FIELD));
|
||||
assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_unsupported() {
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
TermQuery termQuery1 = new TermQuery(new Term("field1", "term1"));
|
||||
bq.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||
TermQuery termQuery2 = new TermQuery(new Term("field2", "term2"));
|
||||
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||
|
||||
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
|
||||
ParseContext.Document document = new ParseContext.Document();
|
||||
extractQueryTerms(query, document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||
extractQueryTerms(query, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||
assertThat(document.getFields().size(), equalTo(1));
|
||||
assertThat(document.getFields().get(0).name(), equalTo(UNKNOWN_QUERY_FIELD));
|
||||
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo(""));
|
||||
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_FAILED));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_notVerified() {
|
||||
PhraseQuery phraseQuery = new PhraseQuery("field", "term");
|
||||
|
||||
ParseContext.Document document = new ParseContext.Document();
|
||||
extractQueryTerms(phraseQuery, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||
assertThat(document.getFields().size(), equalTo(2));
|
||||
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
|
||||
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term"));
|
||||
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_PARTIAL));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_termQuery() {
|
||||
TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(termQuery));
|
||||
Result result = extractQueryTerms(termQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
assertThat(terms.size(), equalTo(1));
|
||||
assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field()));
|
||||
assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
|
||||
|
@ -115,7 +130,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
|
||||
public void testExtractQueryMetadata_termsQuery() {
|
||||
TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(termsQuery));
|
||||
Result result = extractQueryTerms(termsQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(2));
|
||||
assertThat(terms.get(0).field(), equalTo("_field"));
|
||||
|
@ -125,7 +142,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
|
||||
// test with different fields
|
||||
termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
|
||||
terms = new ArrayList<>(extractQueryTerms(termsQuery));
|
||||
result = extractQueryTerms(termsQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(2));
|
||||
assertThat(terms.get(0).field(), equalTo("_field1"));
|
||||
|
@ -136,7 +155,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
|
||||
public void testExtractQueryMetadata_phraseQuery() {
|
||||
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(phraseQuery));
|
||||
Result result = extractQueryTerms(phraseQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
assertThat(terms.size(), equalTo(1));
|
||||
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
|
||||
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
|
||||
|
@ -157,7 +178,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
|
||||
|
||||
BooleanQuery booleanQuery = builder.build();
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
|
||||
Result result = extractQueryTerms(booleanQuery);
|
||||
assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(3));
|
||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||
|
@ -183,7 +206,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
|
||||
|
||||
BooleanQuery booleanQuery = builder.build();
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
|
||||
Result result = extractQueryTerms(booleanQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(4));
|
||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||
|
@ -204,16 +229,74 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
builder.add(phraseQuery, BooleanClause.Occur.SHOULD);
|
||||
|
||||
BooleanQuery booleanQuery = builder.build();
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
|
||||
Result result = extractQueryTerms(booleanQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
assertThat(terms.size(), equalTo(1));
|
||||
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
|
||||
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
|
||||
}
|
||||
|
||||
public void testExactMatch_booleanQuery() {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1"));
|
||||
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||
TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2"));
|
||||
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||
Result result = extractQueryTerms(builder.build());
|
||||
assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||
PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2");
|
||||
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
|
||||
result = extractQueryTerms(builder.build());
|
||||
assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
|
||||
PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4");
|
||||
builder.add(phraseQuery2, BooleanClause.Occur.SHOULD);
|
||||
result = extractQueryTerms(builder.build());
|
||||
assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(termQuery1, BooleanClause.Occur.MUST_NOT);
|
||||
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||
result = extractQueryTerms(builder.build());
|
||||
assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32));
|
||||
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||
result = extractQueryTerms(builder.build());
|
||||
assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||
result = extractQueryTerms(builder.build());
|
||||
assertThat("Single required clause, so candidate matches are verified", result.verified, is(false));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||
builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||
result = extractQueryTerms(builder.build());
|
||||
assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||
builder.add(termQuery2, BooleanClause.Occur.MUST_NOT);
|
||||
result = extractQueryTerms(builder.build());
|
||||
assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_constantScoreQuery() {
|
||||
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
|
||||
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
|
||||
Result result = extractQueryTerms(constantScoreQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
assertThat(terms.size(), equalTo(1));
|
||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
||||
|
@ -222,7 +305,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
public void testExtractQueryMetadata_boostQuery() {
|
||||
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
|
||||
BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
|
||||
Result result = extractQueryTerms(constantScoreQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
assertThat(terms.size(), equalTo(1));
|
||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
||||
|
@ -232,7 +317,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
|
||||
commonTermsQuery.add(new Term("_field", "_term1"));
|
||||
commonTermsQuery.add(new Term("_field", "_term2"));
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(commonTermsQuery));
|
||||
Result result = extractQueryTerms(commonTermsQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(2));
|
||||
assertThat(terms.get(0).field(), equalTo("_field"));
|
||||
|
@ -242,15 +329,17 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testExtractQueryMetadata_blendedTermQuery() {
|
||||
Term[] terms = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
|
||||
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(terms, false);
|
||||
List<Term> result = new ArrayList<>(extractQueryTerms(commonTermsQuery));
|
||||
Collections.sort(result);
|
||||
assertThat(result.size(), equalTo(2));
|
||||
assertThat(result.get(0).field(), equalTo("_field"));
|
||||
assertThat(result.get(0).text(), equalTo("_term1"));
|
||||
assertThat(result.get(1).field(), equalTo("_field"));
|
||||
assertThat(result.get(1).text(), equalTo("_term2"));
|
||||
Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
|
||||
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(termsArr, false);
|
||||
Result result = extractQueryTerms(commonTermsQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(2));
|
||||
assertThat(terms.get(0).field(), equalTo("_field"));
|
||||
assertThat(terms.get(0).text(), equalTo("_term1"));
|
||||
assertThat(terms.get(1).field(), equalTo("_field"));
|
||||
assertThat(terms.get(1).text(), equalTo("_term2"));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_spanTermQuery() {
|
||||
|
@ -266,8 +355,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
// 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
|
||||
|
||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||
Set<Term> terms = extractQueryTerms(spanTermQuery1);
|
||||
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
||||
Result result = extractQueryTerms(spanTermQuery1);
|
||||
assertThat(result.verified, is(true));
|
||||
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_spanNearQuery() {
|
||||
|
@ -275,48 +365,109 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
|
||||
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
|
||||
Set<Term> terms = extractQueryTerms(spanNearQuery);
|
||||
assertTermsEqual(terms, spanTermQuery2.getTerm());
|
||||
|
||||
Result result = extractQueryTerms(spanNearQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
assertTermsEqual(result.terms, spanTermQuery2.getTerm());
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_spanOrQuery() {
|
||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
|
||||
Set<Term> terms = extractQueryTerms(spanOrQuery);
|
||||
assertTermsEqual(terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
|
||||
Result result = extractQueryTerms(spanOrQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_spanFirstQuery() {
|
||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
|
||||
Set<Term> terms = extractQueryTerms(spanFirstQuery);
|
||||
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
||||
Result result = extractQueryTerms(spanFirstQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_spanNotQuery() {
|
||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
|
||||
Set<Term> terms = extractQueryTerms(spanNotQuery);
|
||||
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
||||
Result result = extractQueryTerms(spanNotQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_matchNoDocsQuery() {
|
||||
Set<Term> terms = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
|
||||
assertEquals(0, terms.size());
|
||||
Result result = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
|
||||
assertThat(result.verified, is(true));
|
||||
assertEquals(0, result.terms.size());
|
||||
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
|
||||
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
|
||||
terms = extractQueryTerms(bq.build());
|
||||
assertEquals(0, terms.size());
|
||||
result = extractQueryTerms(bq.build());
|
||||
assertThat(result.verified, is(false));
|
||||
assertEquals(0, result.terms.size());
|
||||
|
||||
bq = new BooleanQuery.Builder();
|
||||
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
|
||||
terms = extractQueryTerms(bq.build());
|
||||
assertTermsEqual(terms, new Term("field", "value"));
|
||||
result = extractQueryTerms(bq.build());
|
||||
assertThat(result.verified, is(true));
|
||||
assertTermsEqual(result.terms, new Term("field", "value"));
|
||||
|
||||
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(
|
||||
Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
|
||||
1f
|
||||
);
|
||||
result = extractQueryTerms(disjunctionMaxQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
assertTermsEqual(result.terms, new Term("field", "value"));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_matchAllDocsQuery() {
|
||||
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(new MatchAllDocsQuery()));
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
Result result = extractQueryTerms(builder.build());
|
||||
assertThat(result.verified, is(false));
|
||||
assertTermsEqual(result.terms, new Term("field", "value"));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
BooleanQuery bq1 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq1));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
BooleanQuery bq2 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery bq3 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq3));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery bq4 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq4));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery bq5 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq5));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_unsupportedQuery() {
|
||||
|
@ -343,8 +494,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
||||
BooleanQuery bq1 = builder.build();
|
||||
|
||||
Set<Term> terms = extractQueryTerms(bq1);
|
||||
assertTermsEqual(terms, termQuery1.getTerm());
|
||||
Result result = extractQueryTerms(bq1);
|
||||
assertThat(result.verified, is(false));
|
||||
assertTermsEqual(result.terms, termQuery1.getTerm());
|
||||
|
||||
TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term"));
|
||||
builder = new BooleanQuery.Builder();
|
||||
|
@ -352,8 +504,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
builder.add(termQuery2, BooleanClause.Occur.MUST);
|
||||
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
||||
bq1 = builder.build();
|
||||
terms = extractQueryTerms(bq1);
|
||||
assertTermsEqual(terms, termQuery2.getTerm());
|
||||
result = extractQueryTerms(bq1);
|
||||
assertThat(result.verified, is(false));
|
||||
assertTermsEqual(result.terms, termQuery2.getTerm());
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
||||
|
@ -372,7 +525,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
|
||||
);
|
||||
|
||||
List<Term> terms = new ArrayList<>(extractQueryTerms(disjunctionMaxQuery));
|
||||
Result result = extractQueryTerms(disjunctionMaxQuery);
|
||||
assertThat(result.verified, is(true));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(4));
|
||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
||||
assertThat(terms.get(1).field(), equalTo(termQuery2.getTerm().field()));
|
||||
assertThat(terms.get(1).bytes(), equalTo(termQuery2.getTerm().bytes()));
|
||||
assertThat(terms.get(2).field(), equalTo(termQuery3.getTerm().field()));
|
||||
assertThat(terms.get(2).bytes(), equalTo(termQuery3.getTerm().bytes()));
|
||||
assertThat(terms.get(3).field(), equalTo(termQuery4.getTerm().field()));
|
||||
assertThat(terms.get(3).bytes(), equalTo(termQuery4.getTerm().bytes()));
|
||||
|
||||
disjunctionMaxQuery = new DisjunctionMaxQuery(
|
||||
Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
|
||||
);
|
||||
|
||||
result = extractQueryTerms(disjunctionMaxQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
terms = new ArrayList<>(result.terms);
|
||||
Collections.sort(terms);
|
||||
assertThat(terms.size(), equalTo(4));
|
||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||
|
@ -394,7 +567,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
|
||||
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
|
||||
TermsQuery query = (TermsQuery)
|
||||
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD);
|
||||
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));
|
||||
|
||||
PrefixCodedTerms terms = query.getTermData();
|
||||
assertThat(terms.size(), equalTo(15L));
|
||||
|
@ -413,7 +586,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
|||
assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
|
||||
assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
|
||||
assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
|
||||
assertTermIterator(termIterator, "", UNKNOWN_QUERY_FIELD);
|
||||
assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
|
||||
}
|
||||
|
||||
public void testSelectTermsListWithHighestSumOfTermLength() {
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
|
@ -37,14 +38,21 @@ import org.apache.lucene.queries.CommonTermsQuery;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.FilterScorer;
|
||||
import org.apache.lucene.search.FilteredDocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
|
@ -52,6 +60,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.ParseContext;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
|
@ -61,8 +70,11 @@ import org.junit.After;
|
|||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
|
||||
import static org.hamcrest.Matchers.arrayWithSize;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
@ -72,7 +84,7 @@ public class PercolateQueryTests extends ESTestCase {
|
|||
|
||||
public final static String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
|
||||
public final static String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
|
||||
public static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
|
||||
public final static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
|
||||
|
||||
static {
|
||||
EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false);
|
||||
|
@ -247,34 +259,91 @@ public class PercolateQueryTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testDuel() throws Exception {
|
||||
int numQueries = scaledRandomIntBetween(32, 256);
|
||||
for (int i = 0; i < numQueries; i++) {
|
||||
String id = Integer.toString(i);
|
||||
Query query;
|
||||
List<Function<String, Query>> queries = new ArrayList<>();
|
||||
queries.add((id) -> new PrefixQuery(new Term("field", id)));
|
||||
queries.add((id) -> new WildcardQuery(new Term("field", id + "*")));
|
||||
queries.add((id) -> new CustomQuery(new Term("field", id)));
|
||||
queries.add((id) -> new SpanTermQuery(new Term("field", id)));
|
||||
queries.add((id) -> new TermQuery(new Term("field", id)));
|
||||
queries.add((id) -> {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
return builder.build();
|
||||
});
|
||||
queries.add((id) -> {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
|
||||
if (randomBoolean()) {
|
||||
query = new PrefixQuery(new Term("field", id));
|
||||
} else if (randomBoolean()) {
|
||||
query = new WildcardQuery(new Term("field", id + "*"));
|
||||
} else if (randomBoolean()) {
|
||||
query = new CustomQuery(new Term("field", id + "*"));
|
||||
} else if (randomBoolean()) {
|
||||
query = new SpanTermQuery(new Term("field", id));
|
||||
} else {
|
||||
query = new TermQuery(new Term("field", id));
|
||||
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
addPercolatorQuery(id, query);
|
||||
if (randomBoolean()) {
|
||||
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
|
||||
}
|
||||
return builder.build();
|
||||
});
|
||||
queries.add((id) -> {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||
if (randomBoolean()) {
|
||||
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return builder.build();
|
||||
});
|
||||
queries.add((id) -> {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
if (randomBoolean()) {
|
||||
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
return builder.build();
|
||||
});
|
||||
queries.add((id) -> {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
if (randomBoolean()) {
|
||||
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
return builder.build();
|
||||
});
|
||||
queries.add((id) -> {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
|
||||
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||
return builder.build();
|
||||
});
|
||||
queries.add((id) -> new MatchAllDocsQuery());
|
||||
queries.add((id) -> new MatchNoDocsQuery("no reason at all"));
|
||||
|
||||
int numDocs = randomIntBetween(queries.size(), queries.size() * 3);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
String id = Integer.toString(i);
|
||||
addPercolatorQuery(id, queries.get(i % queries.size()).apply(id));
|
||||
}
|
||||
|
||||
indexWriter.close();
|
||||
directoryReader = DirectoryReader.open(directory);
|
||||
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
||||
// Disable query cache, because ControlQuery cannot be cached...
|
||||
shardSearcher.setQueryCache(null);
|
||||
|
||||
for (int i = 0; i < numQueries; i++) {
|
||||
MemoryIndex memoryIndex = new MemoryIndex();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
String id = Integer.toString(i);
|
||||
MemoryIndex memoryIndex = new MemoryIndex();
|
||||
memoryIndex.addField("field", id, new WhitespaceAnalyzer());
|
||||
duelRun(memoryIndex, shardSearcher);
|
||||
}
|
||||
|
||||
MemoryIndex memoryIndex = new MemoryIndex();
|
||||
memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
|
||||
duelRun(memoryIndex, shardSearcher);
|
||||
// Empty percolator doc:
|
||||
memoryIndex = new MemoryIndex();
|
||||
duelRun(memoryIndex, shardSearcher);
|
||||
}
|
||||
|
||||
public void testDuelSpecificQueries() throws Exception {
|
||||
|
@ -312,6 +381,8 @@ public class PercolateQueryTests extends ESTestCase {
|
|||
indexWriter.close();
|
||||
directoryReader = DirectoryReader.open(directory);
|
||||
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
||||
// Disable query cache, because ControlQuery cannot be cached...
|
||||
shardSearcher.setQueryCache(null);
|
||||
|
||||
MemoryIndex memoryIndex = new MemoryIndex();
|
||||
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
|
||||
|
@ -332,33 +403,33 @@ public class PercolateQueryTests extends ESTestCase {
|
|||
}
|
||||
|
||||
private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
|
||||
boolean requireScore = randomBoolean();
|
||||
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
|
||||
PercolateQuery.Builder builder1 = new PercolateQuery.Builder(
|
||||
PercolateQuery.Builder builder = new PercolateQuery.Builder(
|
||||
"docType",
|
||||
queryStore,
|
||||
new BytesArray("{}"),
|
||||
percolateSearcher
|
||||
);
|
||||
// enables the optimization that prevents queries from being evaluated that don't match
|
||||
builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
|
||||
TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10);
|
||||
builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
|
||||
Query query = requireScore ? builder.build() : new ConstantScoreQuery(builder.build());
|
||||
TopDocs topDocs = shardSearcher.search(query, 10);
|
||||
|
||||
PercolateQuery.Builder builder2 = new PercolateQuery.Builder(
|
||||
"docType",
|
||||
queryStore,
|
||||
new BytesArray("{}"),
|
||||
percolateSearcher
|
||||
);
|
||||
builder2.setPercolateTypeQuery(new MatchAllDocsQuery());
|
||||
TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10);
|
||||
assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits));
|
||||
assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length));
|
||||
for (int j = 0; j < topDocs1.scoreDocs.length; j++) {
|
||||
assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc));
|
||||
assertThat(topDocs1.scoreDocs[j].score, equalTo(topDocs2.scoreDocs[j].score));
|
||||
Explanation explain1 = shardSearcher.explain(builder1.build(), topDocs1.scoreDocs[j].doc);
|
||||
Explanation explain2 = shardSearcher.explain(builder2.build(), topDocs2.scoreDocs[j].doc);
|
||||
assertThat(explain1.toHtml(), equalTo(explain2.toHtml()));
|
||||
Query controlQuery = new ControlQuery(memoryIndex, queryStore);
|
||||
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
|
||||
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
|
||||
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
|
||||
assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
|
||||
for (int j = 0; j < topDocs.scoreDocs.length; j++) {
|
||||
assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
|
||||
assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
|
||||
if (requireScore) {
|
||||
Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
|
||||
Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
|
||||
assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
|
||||
assertThat(explain1.getValue(), equalTo(explain2.getValue()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -391,4 +462,89 @@ public class PercolateQueryTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private final static class ControlQuery extends Query {
|
||||
|
||||
private final MemoryIndex memoryIndex;
|
||||
private final PercolateQuery.QueryStore queryStore;
|
||||
|
||||
private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
|
||||
this.memoryIndex = memoryIndex;
|
||||
this.queryStore = queryStore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
float _score;
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
Scorer scorer = scorer(context);
|
||||
if (scorer != null) {
|
||||
int result = scorer.iterator().advance(doc);
|
||||
if (result == doc) {
|
||||
return Explanation.match(scorer.score(), "ControlQuery");
|
||||
}
|
||||
}
|
||||
return Explanation.noMatch("ControlQuery");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "weight(" + ControlQuery.this + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
|
||||
PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context);
|
||||
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
|
||||
|
||||
@Override
|
||||
protected boolean match(int doc) {
|
||||
try {
|
||||
Query query = leaf.getQuery(doc);
|
||||
float score = memoryIndex.search(query);
|
||||
if (score != 0f) {
|
||||
if (needsScores) {
|
||||
_score = score;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) {
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return _score;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "control{" + field + "}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return sameClassAs(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return classHash();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -49,6 +49,8 @@ import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
|
|||
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
|
||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
@ -71,7 +73,9 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
indexService = createIndex("test", Settings.EMPTY);
|
||||
mapperService = indexService.mapperService();
|
||||
|
||||
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
|
||||
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
|
||||
.startObject("properties")
|
||||
.startObject("field").field("type", "text").endObject()
|
||||
.startObject("number_field").field("type", "long").endObject()
|
||||
.startObject("date_field").field("type", "date").endObject()
|
||||
|
@ -96,20 +100,21 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
.field(fieldName, queryBuilder)
|
||||
.endObject().bytes());
|
||||
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(0));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_COMPLETE));
|
||||
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
|
||||
assertQueryBuilder(qbSource, queryBuilder);
|
||||
|
||||
// add an query for which we don't extract terms from
|
||||
queryBuilder = matchAllQuery();
|
||||
queryBuilder = rangeQuery("field").from("a").to("z");
|
||||
doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
|
||||
.field(fieldName, queryBuilder)
|
||||
.endObject().bytes());
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(1));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName())[0].binaryValue(), equalTo(new BytesRef()));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_FAILED));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0));
|
||||
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
|
||||
qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
|
||||
|
@ -195,6 +200,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
public void testMultiplePercolatorFields() throws Exception {
|
||||
String typeName = "another_type";
|
||||
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
|
||||
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
|
||||
.startObject("properties")
|
||||
.startObject("query_field1").field("type", "percolator").endObject()
|
||||
.startObject("query_field2").field("type", "percolator").endObject()
|
||||
|
@ -209,7 +215,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
.field("query_field2", queryBuilder)
|
||||
.endObject().bytes()
|
||||
);
|
||||
assertThat(doc.rootDoc().getFields().size(), equalTo(22)); // also includes all other meta fields
|
||||
assertThat(doc.rootDoc().getFields().size(), equalTo(11)); // also includes _uid (1), type (2), source (1)
|
||||
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
|
||||
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
||||
|
||||
|
@ -221,6 +227,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
public void testNestedPercolatorField() throws Exception {
|
||||
String typeName = "another_type";
|
||||
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
|
||||
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
|
||||
.startObject("properties")
|
||||
.startObject("object_field")
|
||||
.field("type", "object")
|
||||
|
@ -238,7 +245,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
.field("query_field", queryBuilder)
|
||||
.endObject().endObject().bytes()
|
||||
);
|
||||
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
|
||||
assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
|
||||
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
|
||||
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
||||
|
||||
|
@ -249,7 +256,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
.endArray()
|
||||
.endObject().bytes()
|
||||
);
|
||||
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
|
||||
assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
|
||||
queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
|
||||
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
||||
|
||||
|
|
|
@ -397,12 +397,16 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
|||
.addMapping("employee", mapping)
|
||||
.addMapping("queries", "query", "type=percolator")
|
||||
);
|
||||
client().prepareIndex("test", "queries", "q").setSource(jsonBuilder().startObject()
|
||||
client().prepareIndex("test", "queries", "q1").setSource(jsonBuilder().startObject()
|
||||
.field("query", QueryBuilders.nestedQuery("employee",
|
||||
QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
|
||||
).endObject())
|
||||
.setRefreshPolicy(IMMEDIATE)
|
||||
.get();
|
||||
// this query should never match as it doesn't use nested query:
|
||||
client().prepareIndex("test", "queries", "q2").setSource(jsonBuilder().startObject()
|
||||
.field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject())
|
||||
.get();
|
||||
client().admin().indices().prepareRefresh().get();
|
||||
|
||||
SearchResponse response = client().prepareSearch()
|
||||
.setQuery(new PercolateQueryBuilder("query", "employee",
|
||||
|
@ -413,9 +417,10 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
|||
.startObject().field("name", "tony stark").endObject()
|
||||
.endArray()
|
||||
.endObject().bytes()))
|
||||
.addSort("_doc", SortOrder.ASC)
|
||||
.get();
|
||||
assertHitCount(response, 1);
|
||||
assertThat(response.getHits().getAt(0).getId(), equalTo("q"));
|
||||
assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
|
||||
|
||||
response = client().prepareSearch()
|
||||
.setQuery(new PercolateQueryBuilder("query", "employee",
|
||||
|
@ -426,12 +431,14 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
|||
.startObject().field("name", "tony stark").endObject()
|
||||
.endArray()
|
||||
.endObject().bytes()))
|
||||
.addSort("_doc", SortOrder.ASC)
|
||||
.get();
|
||||
assertHitCount(response, 0);
|
||||
|
||||
response = client().prepareSearch()
|
||||
.setQuery(new PercolateQueryBuilder("query", "employee",
|
||||
XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject().bytes()))
|
||||
.addSort("_doc", SortOrder.ASC)
|
||||
.get();
|
||||
assertHitCount(response, 0);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue