percolator: Don't verify candidate matches with MemoryIndex that are verified matches
If we don't care about scoring then for certain candidate matches we can be certain, that if they are a candidate match, then they will always match. So verifying these queries with the MemoryIndex can be skipped.
This commit is contained in:
parent
6d5b4a78fe
commit
599a548998
|
@ -19,6 +19,7 @@
|
||||||
package org.elasticsearch.common.lucene;
|
package org.elasticsearch.common.lucene;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
@ -27,6 +28,8 @@ import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.NoDeletionPolicy;
|
import org.apache.lucene.index.NoDeletionPolicy;
|
||||||
import org.apache.lucene.index.NoMergePolicy;
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
@ -35,9 +38,11 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.MMapDirectory;
|
import org.apache.lucene.store.MMapDirectory;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -49,9 +54,6 @@ import java.util.Set;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class LuceneTests extends ESTestCase {
|
public class LuceneTests extends ESTestCase {
|
||||||
public void testWaitForIndex() throws Exception {
|
public void testWaitForIndex() throws Exception {
|
||||||
final MockDirectoryWrapper dir = newMockDirectory();
|
final MockDirectoryWrapper dir = newMockDirectory();
|
||||||
|
@ -355,6 +357,45 @@ public class LuceneTests extends ESTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAsSequentialAccessBits() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "bar", Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "bar", Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
|
||||||
|
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
Weight termWeight = new TermQuery(new Term("foo", "bar")).createWeight(searcher, false);
|
||||||
|
assertEquals(1, reader.leaves().size());
|
||||||
|
LeafReaderContext leafReaderContext = reader.leaves().get(0);
|
||||||
|
Bits bits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), termWeight.scorer(leafReaderContext));
|
||||||
|
|
||||||
|
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(-1));
|
||||||
|
expectThrows(IndexOutOfBoundsException.class, () -> bits.get(leafReaderContext.reader().maxDoc()));
|
||||||
|
assertTrue(bits.get(0));
|
||||||
|
assertTrue(bits.get(0));
|
||||||
|
assertFalse(bits.get(1));
|
||||||
|
assertFalse(bits.get(1));
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> bits.get(0));
|
||||||
|
assertTrue(bits.get(2));
|
||||||
|
assertTrue(bits.get(2));
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> bits.get(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that the "unmap hack" is detected as supported by lucene.
|
* Test that the "unmap hack" is detected as supported by lucene.
|
||||||
* This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038
|
* This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038
|
||||||
|
|
|
@ -53,10 +53,13 @@ import org.elasticsearch.index.mapper.ParseContext;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility to extract query terms from queries and create queries from documents.
|
* Utility to extract query terms from queries and create queries from documents.
|
||||||
|
@ -64,64 +67,179 @@ import java.util.Set;
|
||||||
public final class ExtractQueryTermsService {
|
public final class ExtractQueryTermsService {
|
||||||
|
|
||||||
private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
|
private static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
|
||||||
|
public static final String EXTRACTION_COMPLETE = "complete";
|
||||||
|
public static final String EXTRACTION_PARTIAL = "partial";
|
||||||
|
public static final String EXTRACTION_FAILED = "failed";
|
||||||
|
|
||||||
|
static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
|
||||||
|
|
||||||
|
static {
|
||||||
|
Map<Class<? extends Query>, Function<Query, Result>> map = new HashMap<>(16);
|
||||||
|
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
|
||||||
|
map.put(ConstantScoreQuery.class, constantScoreQuery());
|
||||||
|
map.put(BoostQuery.class, boostQuery());
|
||||||
|
map.put(TermQuery.class, termQuery());
|
||||||
|
map.put(TermsQuery.class, termsQuery());
|
||||||
|
map.put(CommonTermsQuery.class, commonTermsQuery());
|
||||||
|
map.put(BlendedTermQuery.class, blendedTermQuery());
|
||||||
|
map.put(PhraseQuery.class, phraseQuery());
|
||||||
|
map.put(SpanTermQuery.class, spanTermQuery());
|
||||||
|
map.put(SpanNearQuery.class, spanNearQuery());
|
||||||
|
map.put(SpanOrQuery.class, spanOrQuery());
|
||||||
|
map.put(SpanFirstQuery.class, spanFirstQuery());
|
||||||
|
map.put(SpanNotQuery.class, spanNotQuery());
|
||||||
|
map.put(BooleanQuery.class, booleanQuery());
|
||||||
|
map.put(DisjunctionMaxQuery.class, disjunctionMaxQuery());
|
||||||
|
queryProcessors = Collections.unmodifiableMap(map);
|
||||||
|
}
|
||||||
|
|
||||||
private ExtractQueryTermsService() {
|
private ExtractQueryTermsService() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts all terms from the specified query and adds it to the specified document.
|
* Extracts all terms from the specified query and adds it to the specified document.
|
||||||
|
*
|
||||||
* @param query The query to extract terms from
|
* @param query The query to extract terms from
|
||||||
* @param document The document to add the extracted terms to
|
* @param document The document to add the extracted terms to
|
||||||
* @param queryTermsFieldField The field in the document holding the extracted terms
|
* @param queryTermsFieldField The field in the document holding the extracted terms
|
||||||
* @param unknownQueryField The field used to mark a document that not all query terms could be extracted.
|
* @param extractionResultField The field contains whether query term extraction was successful, partial or
|
||||||
* For example the query contained an unsupported query (e.g. WildcardQuery).
|
* failed. (For example the query contained an unsupported query (e.g. WildcardQuery)
|
||||||
* @param fieldType The field type for the query metadata field
|
* then query extraction would fail)
|
||||||
|
* @param fieldType The field type for the query metadata field
|
||||||
*/
|
*/
|
||||||
public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField,
|
public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField,
|
||||||
String unknownQueryField, FieldType fieldType) {
|
String extractionResultField, FieldType fieldType) {
|
||||||
Set<Term> queryTerms;
|
Result result;
|
||||||
try {
|
try {
|
||||||
queryTerms = extractQueryTerms(query);
|
result = extractQueryTerms(query);
|
||||||
} catch (UnsupportedQueryException e) {
|
} catch (UnsupportedQueryException e) {
|
||||||
document.add(new Field(unknownQueryField, new BytesRef(), fieldType));
|
document.add(new Field(extractionResultField, EXTRACTION_FAILED, fieldType));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (Term term : queryTerms) {
|
for (Term term : result.terms) {
|
||||||
BytesRefBuilder builder = new BytesRefBuilder();
|
BytesRefBuilder builder = new BytesRefBuilder();
|
||||||
builder.append(new BytesRef(term.field()));
|
builder.append(new BytesRef(term.field()));
|
||||||
builder.append(FIELD_VALUE_SEPARATOR);
|
builder.append(FIELD_VALUE_SEPARATOR);
|
||||||
builder.append(term.bytes());
|
builder.append(term.bytes());
|
||||||
document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType));
|
document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType));
|
||||||
}
|
}
|
||||||
|
if (result.verified) {
|
||||||
|
document.add(new Field(extractionResultField, EXTRACTION_COMPLETE, fieldType));
|
||||||
|
} else {
|
||||||
|
document.add(new Field(extractionResultField, EXTRACTION_PARTIAL, fieldType));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a terms query containing all terms from all fields of the specified index reader.
|
||||||
|
*/
|
||||||
|
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
|
||||||
|
Term... optionalTerms) throws IOException {
|
||||||
|
Objects.requireNonNull(queryMetadataField);
|
||||||
|
|
||||||
|
List<Term> extractedTerms = new ArrayList<>();
|
||||||
|
Collections.addAll(extractedTerms, optionalTerms);
|
||||||
|
|
||||||
|
Fields fields = MultiFields.getFields(indexReader);
|
||||||
|
for (String field : fields) {
|
||||||
|
Terms terms = fields.terms(field);
|
||||||
|
if (terms == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
BytesRef fieldBr = new BytesRef(field);
|
||||||
|
TermsEnum tenum = terms.iterator();
|
||||||
|
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
|
||||||
|
BytesRefBuilder builder = new BytesRefBuilder();
|
||||||
|
builder.append(fieldBr);
|
||||||
|
builder.append(FIELD_VALUE_SEPARATOR);
|
||||||
|
builder.append(term);
|
||||||
|
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new TermsQuery(extractedTerms);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts all query terms from the provided query and adds it to specified list.
|
* Extracts all query terms from the provided query and adds it to specified list.
|
||||||
*
|
* <p>
|
||||||
* From boolean query with no should clauses or phrase queries only the longest term are selected,
|
* From boolean query with no should clauses or phrase queries only the longest term are selected,
|
||||||
* since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
|
* since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
|
||||||
*
|
* <p>
|
||||||
* If from part of the query, no query terms can be extracted then term extraction is stopped and
|
* If from part of the query, no query terms can be extracted then term extraction is stopped and
|
||||||
* an UnsupportedQueryException is thrown.
|
* an UnsupportedQueryException is thrown.
|
||||||
*/
|
*/
|
||||||
static Set<Term> extractQueryTerms(Query query) {
|
static Result extractQueryTerms(Query query) {
|
||||||
if (query instanceof MatchNoDocsQuery) {
|
Class queryClass = query.getClass();
|
||||||
// no terms to extract as this query matches no docs
|
if (queryClass.isAnonymousClass()) {
|
||||||
return Collections.emptySet();
|
// Sometimes queries have anonymous classes in that case we need the direct super class.
|
||||||
} else if (query instanceof TermQuery) {
|
// (for example blended term query)
|
||||||
return Collections.singleton(((TermQuery) query).getTerm());
|
queryClass = queryClass.getSuperclass();
|
||||||
} else if (query instanceof TermsQuery) {
|
}
|
||||||
Set<Term> terms = new HashSet<>();
|
Function<Query, Result> queryProcessor = queryProcessors.get(queryClass);
|
||||||
|
if (queryProcessor != null) {
|
||||||
|
return queryProcessor.apply(query);
|
||||||
|
} else {
|
||||||
|
throw new UnsupportedQueryException(query);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> matchNoDocsQuery() {
|
||||||
|
return (query -> new Result(true, Collections.emptySet()));
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> constantScoreQuery() {
|
||||||
|
return query -> {
|
||||||
|
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
|
||||||
|
return extractQueryTerms(wrappedQuery);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> boostQuery() {
|
||||||
|
return query -> {
|
||||||
|
Query wrappedQuery = ((BoostQuery) query).getQuery();
|
||||||
|
return extractQueryTerms(wrappedQuery);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> termQuery() {
|
||||||
|
return (query -> {
|
||||||
|
TermQuery termQuery = (TermQuery) query;
|
||||||
|
return new Result(true, Collections.singleton(termQuery.getTerm()));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> termsQuery() {
|
||||||
|
return query -> {
|
||||||
TermsQuery termsQuery = (TermsQuery) query;
|
TermsQuery termsQuery = (TermsQuery) query;
|
||||||
|
Set<Term> terms = new HashSet<>();
|
||||||
PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator();
|
PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator();
|
||||||
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
|
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
|
||||||
terms.add(new Term(iterator.field(), term));
|
terms.add(new Term(iterator.field(), term));
|
||||||
}
|
}
|
||||||
return terms;
|
return new Result(true, terms);
|
||||||
} else if (query instanceof PhraseQuery) {
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> commonTermsQuery() {
|
||||||
|
return query -> {
|
||||||
|
List<Term> terms = ((CommonTermsQuery) query).getTerms();
|
||||||
|
return new Result(false, new HashSet<>(terms));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> blendedTermQuery() {
|
||||||
|
return query -> {
|
||||||
|
List<Term> terms = ((BlendedTermQuery) query).getTerms();
|
||||||
|
return new Result(true, new HashSet<>(terms));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> phraseQuery() {
|
||||||
|
return query -> {
|
||||||
Term[] terms = ((PhraseQuery) query).getTerms();
|
Term[] terms = ((PhraseQuery) query).getTerms();
|
||||||
if (terms.length == 0) {
|
if (terms.length == 0) {
|
||||||
return Collections.emptySet();
|
return new Result(true, Collections.emptySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
// the longest term is likely to be the rarest,
|
// the longest term is likely to be the rarest,
|
||||||
|
@ -132,19 +250,76 @@ public final class ExtractQueryTermsService {
|
||||||
longestTerm = term;
|
longestTerm = term;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Collections.singleton(longestTerm);
|
return new Result(false, Collections.singleton(longestTerm));
|
||||||
} else if (query instanceof BooleanQuery) {
|
};
|
||||||
List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
|
}
|
||||||
boolean hasRequiredClauses = false;
|
|
||||||
|
static Function<Query, Result> spanTermQuery() {
|
||||||
|
return query -> {
|
||||||
|
Term term = ((SpanTermQuery) query).getTerm();
|
||||||
|
return new Result(true, Collections.singleton(term));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> spanNearQuery() {
|
||||||
|
return query -> {
|
||||||
|
Set<Term> bestClauses = null;
|
||||||
|
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
|
||||||
|
for (SpanQuery clause : spanNearQuery.getClauses()) {
|
||||||
|
Result temp = extractQueryTerms(clause);
|
||||||
|
bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses);
|
||||||
|
}
|
||||||
|
return new Result(false, bestClauses);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> spanOrQuery() {
|
||||||
|
return query -> {
|
||||||
|
Set<Term> terms = new HashSet<>();
|
||||||
|
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
|
||||||
|
for (SpanQuery clause : spanOrQuery.getClauses()) {
|
||||||
|
terms.addAll(extractQueryTerms(clause).terms);
|
||||||
|
}
|
||||||
|
return new Result(false, terms);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> spanNotQuery() {
|
||||||
|
return query -> {
|
||||||
|
Result result = extractQueryTerms(((SpanNotQuery) query).getInclude());
|
||||||
|
return new Result(false, result.terms);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> spanFirstQuery() {
|
||||||
|
return query -> {
|
||||||
|
Result result = extractQueryTerms(((SpanFirstQuery) query).getMatch());
|
||||||
|
return new Result(false, result.terms);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Function<Query, Result> booleanQuery() {
|
||||||
|
return query -> {
|
||||||
|
BooleanQuery bq = (BooleanQuery) query;
|
||||||
|
List<BooleanClause> clauses = bq.clauses();
|
||||||
|
int minimumShouldMatch = bq.getMinimumNumberShouldMatch();
|
||||||
|
int numRequiredClauses = 0;
|
||||||
|
int numOptionalClauses = 0;
|
||||||
|
int numProhibitedClauses = 0;
|
||||||
for (BooleanClause clause : clauses) {
|
for (BooleanClause clause : clauses) {
|
||||||
if (clause.isRequired()) {
|
if (clause.isRequired()) {
|
||||||
hasRequiredClauses = true;
|
numRequiredClauses++;
|
||||||
break;
|
}
|
||||||
|
if (clause.isProhibited()) {
|
||||||
|
numProhibitedClauses++;
|
||||||
|
}
|
||||||
|
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
|
||||||
|
numOptionalClauses++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hasRequiredClauses) {
|
if (numRequiredClauses > 0) {
|
||||||
UnsupportedQueryException uqe = null;
|
|
||||||
Set<Term> bestClause = null;
|
Set<Term> bestClause = null;
|
||||||
|
UnsupportedQueryException uqe = null;
|
||||||
for (BooleanClause clause : clauses) {
|
for (BooleanClause clause : clauses) {
|
||||||
if (clause.isRequired() == false) {
|
if (clause.isRequired() == false) {
|
||||||
// skip must_not clauses, we don't need to remember the things that do *not* match...
|
// skip must_not clauses, we don't need to remember the things that do *not* match...
|
||||||
|
@ -153,77 +328,56 @@ public final class ExtractQueryTermsService {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<Term> temp;
|
Result temp;
|
||||||
try {
|
try {
|
||||||
temp = extractQueryTerms(clause.getQuery());
|
temp = extractQueryTerms(clause.getQuery());
|
||||||
} catch (UnsupportedQueryException e) {
|
} catch (UnsupportedQueryException e) {
|
||||||
uqe = e;
|
uqe = e;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
|
bestClause = selectTermListWithTheLongestShortestTerm(temp.terms, bestClause);
|
||||||
}
|
}
|
||||||
if (bestClause != null) {
|
if (bestClause != null) {
|
||||||
return bestClause;
|
return new Result(false, bestClause);
|
||||||
} else {
|
} else {
|
||||||
if (uqe != null) {
|
if (uqe != null) {
|
||||||
|
// we're unable to select the best clause and an exception occurred, so we bail
|
||||||
throw uqe;
|
throw uqe;
|
||||||
|
} else {
|
||||||
|
// We didn't find a clause and no exception occurred, so this bq only contained MatchNoDocsQueries,
|
||||||
|
return new Result(true, Collections.emptySet());
|
||||||
}
|
}
|
||||||
return Collections.emptySet();
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Set<Term> terms = new HashSet<>();
|
List<Query> disjunctions = new ArrayList<>(numOptionalClauses);
|
||||||
for (BooleanClause clause : clauses) {
|
for (BooleanClause clause : clauses) {
|
||||||
if (clause.isProhibited()) {
|
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
|
||||||
// we don't need to remember the things that do *not* match...
|
disjunctions.add(clause.getQuery());
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
terms.addAll(extractQueryTerms(clause.getQuery()));
|
|
||||||
}
|
}
|
||||||
return terms;
|
return handleDisjunction(disjunctions, minimumShouldMatch, numProhibitedClauses > 0);
|
||||||
}
|
}
|
||||||
} else if (query instanceof ConstantScoreQuery) {
|
};
|
||||||
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
|
}
|
||||||
return extractQueryTerms(wrappedQuery);
|
|
||||||
} else if (query instanceof BoostQuery) {
|
static Function<Query, Result> disjunctionMaxQuery() {
|
||||||
Query wrappedQuery = ((BoostQuery) query).getQuery();
|
return query -> {
|
||||||
return extractQueryTerms(wrappedQuery);
|
|
||||||
} else if (query instanceof CommonTermsQuery) {
|
|
||||||
List<Term> terms = ((CommonTermsQuery) query).getTerms();
|
|
||||||
return new HashSet<>(terms);
|
|
||||||
} else if (query instanceof BlendedTermQuery) {
|
|
||||||
List<Term> terms = ((BlendedTermQuery) query).getTerms();
|
|
||||||
return new HashSet<>(terms);
|
|
||||||
} else if (query instanceof DisjunctionMaxQuery) {
|
|
||||||
List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts();
|
List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts();
|
||||||
Set<Term> terms = new HashSet<>();
|
return handleDisjunction(disjuncts, 1, false);
|
||||||
for (Query disjunct : disjuncts) {
|
};
|
||||||
terms.addAll(extractQueryTerms(disjunct));
|
}
|
||||||
|
|
||||||
|
static Result handleDisjunction(List<Query> disjunctions, int minimumShouldMatch, boolean otherClauses) {
|
||||||
|
boolean verified = minimumShouldMatch <= 1 && otherClauses == false;
|
||||||
|
Set<Term> terms = new HashSet<>();
|
||||||
|
for (Query disjunct : disjunctions) {
|
||||||
|
Result subResult = extractQueryTerms(disjunct);
|
||||||
|
if (subResult.verified == false) {
|
||||||
|
verified = false;
|
||||||
}
|
}
|
||||||
return terms;
|
terms.addAll(subResult.terms);
|
||||||
} else if (query instanceof SpanTermQuery) {
|
|
||||||
return Collections.singleton(((SpanTermQuery) query).getTerm());
|
|
||||||
} else if (query instanceof SpanNearQuery) {
|
|
||||||
Set<Term> bestClause = null;
|
|
||||||
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
|
|
||||||
for (SpanQuery clause : spanNearQuery.getClauses()) {
|
|
||||||
Set<Term> temp = extractQueryTerms(clause);
|
|
||||||
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
|
|
||||||
}
|
|
||||||
return bestClause;
|
|
||||||
} else if (query instanceof SpanOrQuery) {
|
|
||||||
Set<Term> terms = new HashSet<>();
|
|
||||||
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
|
|
||||||
for (SpanQuery clause : spanOrQuery.getClauses()) {
|
|
||||||
terms.addAll(extractQueryTerms(clause));
|
|
||||||
}
|
|
||||||
return terms;
|
|
||||||
} else if (query instanceof SpanFirstQuery) {
|
|
||||||
return extractQueryTerms(((SpanFirstQuery)query).getMatch());
|
|
||||||
} else if (query instanceof SpanNotQuery) {
|
|
||||||
return extractQueryTerms(((SpanNotQuery) query).getInclude());
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedQueryException(query);
|
|
||||||
}
|
}
|
||||||
|
return new Result(verified, terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) {
|
static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) {
|
||||||
|
@ -243,7 +397,7 @@ public final class ExtractQueryTermsService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int minTermLength(Set<Term> terms) {
|
static int minTermLength(Set<Term> terms) {
|
||||||
int min = Integer.MAX_VALUE;
|
int min = Integer.MAX_VALUE;
|
||||||
for (Term term : terms) {
|
for (Term term : terms) {
|
||||||
min = Math.min(min, term.bytes().length);
|
min = Math.min(min, term.bytes().length);
|
||||||
|
@ -251,40 +405,22 @@ public final class ExtractQueryTermsService {
|
||||||
return min;
|
return min;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static class Result {
|
||||||
* Creates a boolean query with a should clause for each term on all fields of the specified index reader.
|
|
||||||
*/
|
|
||||||
public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
|
|
||||||
String unknownQueryField) throws IOException {
|
|
||||||
Objects.requireNonNull(queryMetadataField);
|
|
||||||
Objects.requireNonNull(unknownQueryField);
|
|
||||||
|
|
||||||
List<Term> extractedTerms = new ArrayList<>();
|
final Set<Term> terms;
|
||||||
extractedTerms.add(new Term(unknownQueryField));
|
final boolean verified;
|
||||||
Fields fields = MultiFields.getFields(indexReader);
|
|
||||||
for (String field : fields) {
|
|
||||||
Terms terms = fields.terms(field);
|
|
||||||
if (terms == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
BytesRef fieldBr = new BytesRef(field);
|
Result(boolean verified, Set<Term> terms) {
|
||||||
TermsEnum tenum = terms.iterator();
|
this.terms = terms;
|
||||||
for (BytesRef term = tenum.next(); term != null ; term = tenum.next()) {
|
this.verified = verified;
|
||||||
BytesRefBuilder builder = new BytesRefBuilder();
|
|
||||||
builder.append(fieldBr);
|
|
||||||
builder.append(FIELD_VALUE_SEPARATOR);
|
|
||||||
builder.append(term);
|
|
||||||
extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return new TermsQuery(extractedTerms);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Exception indicating that none or some query terms couldn't extracted from a percolator query.
|
* Exception indicating that none or some query terms couldn't extracted from a percolator query.
|
||||||
*/
|
*/
|
||||||
public static class UnsupportedQueryException extends RuntimeException {
|
static class UnsupportedQueryException extends RuntimeException {
|
||||||
|
|
||||||
private final Query unsupportedQuery;
|
private final Query unsupportedQuery;
|
||||||
|
|
||||||
|
|
|
@ -28,12 +28,15 @@ import org.apache.lucene.search.Explanation;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.search.TwoPhaseIterator;
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
@ -54,6 +57,7 @@ public final class PercolateQuery extends Query implements Accountable {
|
||||||
private final IndexSearcher percolatorIndexSearcher;
|
private final IndexSearcher percolatorIndexSearcher;
|
||||||
|
|
||||||
private Query queriesMetaDataQuery;
|
private Query queriesMetaDataQuery;
|
||||||
|
private Query verifiedQueriesQuery = new MatchNoDocsQuery("");
|
||||||
private Query percolateTypeQuery;
|
private Query percolateTypeQuery;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -64,21 +68,32 @@ public final class PercolateQuery extends Query implements Accountable {
|
||||||
*/
|
*/
|
||||||
public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) {
|
public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) {
|
||||||
this.docType = Objects.requireNonNull(docType);
|
this.docType = Objects.requireNonNull(docType);
|
||||||
|
this.queryStore = Objects.requireNonNull(queryStore);
|
||||||
this.documentSource = Objects.requireNonNull(documentSource);
|
this.documentSource = Objects.requireNonNull(documentSource);
|
||||||
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
|
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
|
||||||
this.queryStore = Objects.requireNonNull(queryStore);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Optionally sets a query that reduces the number of queries to percolate based on extracted terms from
|
* Optionally sets a query that reduces the number of queries to percolate based on extracted terms from
|
||||||
* the document to be percolated.
|
* the document to be percolated.
|
||||||
*
|
* @param extractedTermsFieldName The name of the field to get the extracted terms from
|
||||||
* @param extractedTermsFieldName The name of the field to get the extracted terms from
|
* @param extractionResultField The field to indicate for a document whether query term extraction was complete,
|
||||||
* @param unknownQueryFieldname The field used to mark documents whose queries couldn't all get extracted
|
* partial or failed. If query extraction was complete, the MemoryIndex doesn't
|
||||||
*/
|
*/
|
||||||
public void extractQueryTermsQuery(String extractedTermsFieldName, String unknownQueryFieldname) throws IOException {
|
public void extractQueryTermsQuery(String extractedTermsFieldName, String extractionResultField) throws IOException {
|
||||||
|
// We can only skip the MemoryIndex verification when percolating a single document.
|
||||||
|
// When the document being percolated contains a nested object field then the MemoryIndex contains multiple
|
||||||
|
// documents. In this case the term query that indicates whether memory index verification can be skipped
|
||||||
|
// can incorrectly indicate that non nested queries would match, while their nested variants would not.
|
||||||
|
if (percolatorIndexSearcher.getIndexReader().maxDoc() == 1) {
|
||||||
|
this.verifiedQueriesQuery = new TermQuery(new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_COMPLETE));
|
||||||
|
}
|
||||||
this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery(
|
this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery(
|
||||||
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName, unknownQueryFieldname
|
percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName,
|
||||||
|
// include extractionResultField:failed, because docs with this term have no extractedTermsField
|
||||||
|
// and otherwise we would fail to return these docs. Docs that failed query term extraction
|
||||||
|
// always need to be verified by MemoryIndex:
|
||||||
|
new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_FAILED)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,14 +109,15 @@ public final class PercolateQuery extends Query implements Accountable {
|
||||||
throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata");
|
throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata");
|
||||||
}
|
}
|
||||||
// The query that selects which percolator queries will be evaluated by MemoryIndex:
|
// The query that selects which percolator queries will be evaluated by MemoryIndex:
|
||||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
BooleanQuery.Builder queriesQuery = new BooleanQuery.Builder();
|
||||||
if (percolateTypeQuery != null) {
|
if (percolateTypeQuery != null) {
|
||||||
builder.add(percolateTypeQuery, FILTER);
|
queriesQuery.add(percolateTypeQuery, FILTER);
|
||||||
}
|
}
|
||||||
if (queriesMetaDataQuery != null) {
|
if (queriesMetaDataQuery != null) {
|
||||||
builder.add(queriesMetaDataQuery, FILTER);
|
queriesQuery.add(queriesMetaDataQuery, FILTER);
|
||||||
}
|
}
|
||||||
return new PercolateQuery(docType, queryStore, documentSource, builder.build(), percolatorIndexSearcher);
|
return new PercolateQuery(docType, queryStore, documentSource, queriesQuery.build(), percolatorIndexSearcher,
|
||||||
|
verifiedQueriesQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -110,22 +126,25 @@ public final class PercolateQuery extends Query implements Accountable {
|
||||||
private final QueryStore queryStore;
|
private final QueryStore queryStore;
|
||||||
private final BytesReference documentSource;
|
private final BytesReference documentSource;
|
||||||
private final Query percolatorQueriesQuery;
|
private final Query percolatorQueriesQuery;
|
||||||
|
private final Query verifiedQueriesQuery;
|
||||||
private final IndexSearcher percolatorIndexSearcher;
|
private final IndexSearcher percolatorIndexSearcher;
|
||||||
|
|
||||||
private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
|
private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
|
||||||
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher) {
|
Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedQueriesQuery) {
|
||||||
this.documentType = documentType;
|
this.documentType = documentType;
|
||||||
this.documentSource = documentSource;
|
this.documentSource = documentSource;
|
||||||
this.percolatorQueriesQuery = percolatorQueriesQuery;
|
this.percolatorQueriesQuery = percolatorQueriesQuery;
|
||||||
this.queryStore = queryStore;
|
this.queryStore = queryStore;
|
||||||
this.percolatorIndexSearcher = percolatorIndexSearcher;
|
this.percolatorIndexSearcher = percolatorIndexSearcher;
|
||||||
|
this.verifiedQueriesQuery = verifiedQueriesQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query rewrite(IndexReader reader) throws IOException {
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
Query rewritten = percolatorQueriesQuery.rewrite(reader);
|
Query rewritten = percolatorQueriesQuery.rewrite(reader);
|
||||||
if (rewritten != percolatorQueriesQuery) {
|
if (rewritten != percolatorQueriesQuery) {
|
||||||
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher);
|
return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher,
|
||||||
|
verifiedQueriesQuery);
|
||||||
} else {
|
} else {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
@ -133,6 +152,7 @@ public final class PercolateQuery extends Query implements Accountable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
|
final Weight verifiedQueriesQueryWeight = verifiedQueriesQuery.createWeight(searcher, false);
|
||||||
final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores);
|
final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores);
|
||||||
return new Weight(this) {
|
return new Weight(this) {
|
||||||
@Override
|
@Override
|
||||||
|
@ -206,6 +226,8 @@ public final class PercolateQuery extends Query implements Accountable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
|
Scorer verifiedDocsScorer = verifiedQueriesQueryWeight.scorer(leafReaderContext);
|
||||||
|
Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
|
||||||
return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {
|
return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -214,6 +236,14 @@ public final class PercolateQuery extends Query implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean matchDocId(int docId) throws IOException {
|
boolean matchDocId(int docId) throws IOException {
|
||||||
|
// We use the verifiedDocsBits to skip the expensive MemoryIndex verification.
|
||||||
|
// If docId also appears in the verifiedDocsBits then that means during indexing
|
||||||
|
// we were able to extract all query terms and for this candidate match
|
||||||
|
// and we determined based on the nature of the query that it is safe to skip
|
||||||
|
// the MemoryIndex verification.
|
||||||
|
if (verifiedDocsBits.get(docId)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
Query query = percolatorQueries.getQuery(docId);
|
Query query = percolatorQueries.getQuery(docId);
|
||||||
return query != null && Lucene.exists(percolatorIndexSearcher, query);
|
return query != null && Lucene.exists(percolatorIndexSearcher, query);
|
||||||
}
|
}
|
||||||
|
|
|
@ -420,10 +420,11 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
|
||||||
"] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
|
"] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
|
||||||
}
|
}
|
||||||
PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
|
PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
|
||||||
|
PercolateQuery.QueryStore queryStore = createStore(pft, context, mapUnmappedFieldsAsString);
|
||||||
PercolateQuery.Builder builder = new PercolateQuery.Builder(
|
PercolateQuery.Builder builder = new PercolateQuery.Builder(
|
||||||
documentType, createStore(pft, context, mapUnmappedFieldsAsString), document, docSearcher
|
documentType, queryStore, document, docSearcher
|
||||||
);
|
);
|
||||||
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getUnknownQueryFieldName());
|
builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getExtractionResultFieldName());
|
||||||
return builder.build();
|
return builder.build();
|
||||||
} else {
|
} else {
|
||||||
Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME));
|
Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME));
|
||||||
|
|
|
@ -58,7 +58,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType();
|
private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType();
|
||||||
|
|
||||||
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
|
public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
|
||||||
public static final String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
|
public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
|
||||||
public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
|
public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
|
||||||
|
|
||||||
public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
|
public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
|
||||||
|
@ -75,15 +75,15 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
context.path().add(name());
|
context.path().add(name());
|
||||||
KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
|
KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
|
||||||
((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType();
|
((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType();
|
||||||
KeywordFieldMapper unknownQueryField = createExtractQueryFieldBuilder(UNKNOWN_QUERY_FIELD_NAME, context);
|
KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context);
|
||||||
((PercolatorFieldType) fieldType).unknownQueryField = unknownQueryField.fieldType();
|
((PercolatorFieldType) fieldType).extractionResultField = extractionResultField.fieldType();
|
||||||
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
|
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
|
||||||
((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType();
|
((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType();
|
||||||
context.path().remove();
|
context.path().remove();
|
||||||
setupFieldType(context);
|
setupFieldType(context);
|
||||||
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
|
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
|
||||||
multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
|
multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
|
||||||
unknownQueryField, queryBuilderField);
|
extractionResultField, queryBuilderField);
|
||||||
}
|
}
|
||||||
|
|
||||||
static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
|
static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
|
||||||
|
@ -102,6 +102,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
builder.fieldType().setDocValuesType(DocValuesType.BINARY);
|
builder.fieldType().setDocValuesType(DocValuesType.BINARY);
|
||||||
return builder.build(context);
|
return builder.build(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class TypeParser implements FieldMapper.TypeParser {
|
public static class TypeParser implements FieldMapper.TypeParser {
|
||||||
|
@ -115,7 +116,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
public static class PercolatorFieldType extends MappedFieldType {
|
public static class PercolatorFieldType extends MappedFieldType {
|
||||||
|
|
||||||
private MappedFieldType queryTermsField;
|
private MappedFieldType queryTermsField;
|
||||||
private MappedFieldType unknownQueryField;
|
private MappedFieldType extractionResultField;
|
||||||
private MappedFieldType queryBuilderField;
|
private MappedFieldType queryBuilderField;
|
||||||
|
|
||||||
public PercolatorFieldType() {
|
public PercolatorFieldType() {
|
||||||
|
@ -127,7 +128,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
public PercolatorFieldType(PercolatorFieldType ref) {
|
public PercolatorFieldType(PercolatorFieldType ref) {
|
||||||
super(ref);
|
super(ref);
|
||||||
queryTermsField = ref.queryTermsField;
|
queryTermsField = ref.queryTermsField;
|
||||||
unknownQueryField = ref.unknownQueryField;
|
extractionResultField = ref.extractionResultField;
|
||||||
queryBuilderField = ref.queryBuilderField;
|
queryBuilderField = ref.queryBuilderField;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,8 +136,8 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
return queryTermsField.name();
|
return queryTermsField.name();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getUnknownQueryFieldName() {
|
public String getExtractionResultFieldName() {
|
||||||
return unknownQueryField.name();
|
return extractionResultField.name();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getQueryBuilderFieldName() {
|
public String getQueryBuilderFieldName() {
|
||||||
|
@ -162,17 +163,17 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
private final boolean mapUnmappedFieldAsString;
|
private final boolean mapUnmappedFieldAsString;
|
||||||
private final QueryShardContext queryShardContext;
|
private final QueryShardContext queryShardContext;
|
||||||
private KeywordFieldMapper queryTermsField;
|
private KeywordFieldMapper queryTermsField;
|
||||||
private KeywordFieldMapper unknownQueryField;
|
private KeywordFieldMapper extractionResultField;
|
||||||
private BinaryFieldMapper queryBuilderField;
|
private BinaryFieldMapper queryBuilderField;
|
||||||
|
|
||||||
public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, QueryShardContext queryShardContext,
|
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, QueryShardContext queryShardContext,
|
||||||
KeywordFieldMapper queryTermsField, KeywordFieldMapper unknownQueryField,
|
KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField,
|
||||||
BinaryFieldMapper queryBuilderField) {
|
BinaryFieldMapper queryBuilderField) {
|
||||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||||
this.queryShardContext = queryShardContext;
|
this.queryShardContext = queryShardContext;
|
||||||
this.queryTermsField = queryTermsField;
|
this.queryTermsField = queryTermsField;
|
||||||
this.unknownQueryField = unknownQueryField;
|
this.extractionResultField = extractionResultField;
|
||||||
this.queryBuilderField = queryBuilderField;
|
this.queryBuilderField = queryBuilderField;
|
||||||
this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
|
this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
|
||||||
}
|
}
|
||||||
|
@ -181,18 +182,18 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
|
public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
|
||||||
PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType);
|
PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType);
|
||||||
KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
|
KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
|
||||||
KeywordFieldMapper unknownQueryUpdated = (KeywordFieldMapper) unknownQueryField.updateFieldType(fullNameToFieldType);
|
KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType);
|
||||||
BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
|
BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
|
||||||
|
|
||||||
if (updated == this || queryTermsUpdated == queryTermsField || unknownQueryUpdated == unknownQueryField
|
if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField
|
||||||
|| queryBuilderUpdated == queryBuilderField) {
|
&& queryBuilderUpdated == queryBuilderField) {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
if (updated == this) {
|
if (updated == this) {
|
||||||
updated = (PercolatorFieldMapper) updated.clone();
|
updated = (PercolatorFieldMapper) updated.clone();
|
||||||
}
|
}
|
||||||
updated.queryTermsField = queryTermsUpdated;
|
updated.queryTermsField = queryTermsUpdated;
|
||||||
updated.unknownQueryField = unknownQueryUpdated;
|
updated.extractionResultField = extractionResultUpdated;
|
||||||
updated.queryBuilderField = queryBuilderUpdated;
|
updated.queryBuilderField = queryBuilderUpdated;
|
||||||
return updated;
|
return updated;
|
||||||
}
|
}
|
||||||
|
@ -220,7 +221,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder);
|
Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder);
|
||||||
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), unknownQueryField.name(),
|
ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), extractionResultField.name(),
|
||||||
queryTermsField.fieldType());
|
queryTermsField.fieldType());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -258,7 +259,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Mapper> iterator() {
|
public Iterator<Mapper> iterator() {
|
||||||
return Arrays.<Mapper>asList(queryTermsField, unknownQueryField, queryBuilderField).iterator();
|
return Arrays.<Mapper>asList(queryTermsField, extractionResultField, queryBuilderField).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.PrefixCodedTerms;
|
import org.apache.lucene.index.PrefixCodedTerms;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.memory.MemoryIndex;
|
import org.apache.lucene.index.memory.MemoryIndex;
|
||||||
|
@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
|
@ -44,6 +46,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
||||||
import org.elasticsearch.index.mapper.ParseContext;
|
import org.elasticsearch.index.mapper.ParseContext;
|
||||||
|
import org.elasticsearch.percolator.ExtractQueryTermsService.Result;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
|
||||||
|
|
||||||
|
@ -54,18 +57,22 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
|
||||||
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
|
||||||
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_PARTIAL;
|
||||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException;
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException;
|
||||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms;
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms;
|
||||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery;
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery;
|
||||||
import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm;
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.is;
|
||||||
import static org.hamcrest.Matchers.sameInstance;
|
import static org.hamcrest.Matchers.sameInstance;
|
||||||
|
|
||||||
public class ExtractQueryTermsServiceTests extends ESTestCase {
|
public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
|
|
||||||
public final static String QUERY_TERMS_FIELD = "extracted_terms";
|
public final static String QUERY_TERMS_FIELD = "extracted_terms";
|
||||||
public final static String UNKNOWN_QUERY_FIELD = "unknown_query";
|
public final static String EXTRACTION_RESULT_FIELD = "extraction_result";
|
||||||
public static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
|
public final static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
QUERY_TERMS_FIELD_TYPE.setTokenized(false);
|
QUERY_TERMS_FIELD_TYPE.setTokenized(false);
|
||||||
|
@ -81,33 +88,41 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
|
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
ParseContext.Document document = new ParseContext.Document();
|
ParseContext.Document document = new ParseContext.Document();
|
||||||
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
|
extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||||
Collections.sort(document.getFields(), (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
|
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_COMPLETE));
|
||||||
assertThat(document.getFields().size(), equalTo(2));
|
List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(QUERY_TERMS_FIELD)));
|
||||||
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
|
Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
|
||||||
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
|
assertThat(fields.size(), equalTo(2));
|
||||||
assertThat(document.getFields().get(1).name(), equalTo(QUERY_TERMS_FIELD));
|
assertThat(fields.get(0).name(), equalTo(QUERY_TERMS_FIELD));
|
||||||
assertThat(document.getFields().get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
|
assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
|
||||||
|
assertThat(fields.get(1).name(), equalTo(QUERY_TERMS_FIELD));
|
||||||
|
assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_unsupported() {
|
public void testExtractQueryMetadata_unsupported() {
|
||||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
|
||||||
TermQuery termQuery1 = new TermQuery(new Term("field1", "term1"));
|
|
||||||
bq.add(termQuery1, BooleanClause.Occur.SHOULD);
|
|
||||||
TermQuery termQuery2 = new TermQuery(new Term("field2", "term2"));
|
|
||||||
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
|
|
||||||
|
|
||||||
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
|
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
|
||||||
ParseContext.Document document = new ParseContext.Document();
|
ParseContext.Document document = new ParseContext.Document();
|
||||||
extractQueryTerms(query, document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
|
extractQueryTerms(query, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||||
assertThat(document.getFields().size(), equalTo(1));
|
assertThat(document.getFields().size(), equalTo(1));
|
||||||
assertThat(document.getFields().get(0).name(), equalTo(UNKNOWN_QUERY_FIELD));
|
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_FAILED));
|
||||||
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo(""));
|
}
|
||||||
|
|
||||||
|
public void testExtractQueryMetadata_notVerified() {
|
||||||
|
PhraseQuery phraseQuery = new PhraseQuery("field", "term");
|
||||||
|
|
||||||
|
ParseContext.Document document = new ParseContext.Document();
|
||||||
|
extractQueryTerms(phraseQuery, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
|
||||||
|
assertThat(document.getFields().size(), equalTo(2));
|
||||||
|
assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
|
||||||
|
assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term"));
|
||||||
|
assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_PARTIAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_termQuery() {
|
public void testExtractQueryMetadata_termQuery() {
|
||||||
TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
|
TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(termQuery));
|
Result result = extractQueryTerms(termQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
assertThat(terms.size(), equalTo(1));
|
assertThat(terms.size(), equalTo(1));
|
||||||
assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field()));
|
assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field()));
|
||||||
assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
|
assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
|
||||||
|
@ -115,7 +130,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
|
|
||||||
public void testExtractQueryMetadata_termsQuery() {
|
public void testExtractQueryMetadata_termsQuery() {
|
||||||
TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
|
TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(termsQuery));
|
Result result = extractQueryTerms(termsQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
Collections.sort(terms);
|
Collections.sort(terms);
|
||||||
assertThat(terms.size(), equalTo(2));
|
assertThat(terms.size(), equalTo(2));
|
||||||
assertThat(terms.get(0).field(), equalTo("_field"));
|
assertThat(terms.get(0).field(), equalTo("_field"));
|
||||||
|
@ -125,7 +142,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
|
|
||||||
// test with different fields
|
// test with different fields
|
||||||
termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
|
termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
|
||||||
terms = new ArrayList<>(extractQueryTerms(termsQuery));
|
result = extractQueryTerms(termsQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
terms = new ArrayList<>(result.terms);
|
||||||
Collections.sort(terms);
|
Collections.sort(terms);
|
||||||
assertThat(terms.size(), equalTo(2));
|
assertThat(terms.size(), equalTo(2));
|
||||||
assertThat(terms.get(0).field(), equalTo("_field1"));
|
assertThat(terms.get(0).field(), equalTo("_field1"));
|
||||||
|
@ -136,7 +155,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
|
|
||||||
public void testExtractQueryMetadata_phraseQuery() {
|
public void testExtractQueryMetadata_phraseQuery() {
|
||||||
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
|
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(phraseQuery));
|
Result result = extractQueryTerms(phraseQuery);
|
||||||
|
assertThat(result.verified, is(false));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
assertThat(terms.size(), equalTo(1));
|
assertThat(terms.size(), equalTo(1));
|
||||||
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
|
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
|
||||||
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
|
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
|
||||||
|
@ -157,7 +178,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
|
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
BooleanQuery booleanQuery = builder.build();
|
BooleanQuery booleanQuery = builder.build();
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
|
Result result = extractQueryTerms(booleanQuery);
|
||||||
|
assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
Collections.sort(terms);
|
Collections.sort(terms);
|
||||||
assertThat(terms.size(), equalTo(3));
|
assertThat(terms.size(), equalTo(3));
|
||||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||||
|
@ -183,7 +206,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
|
builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
BooleanQuery booleanQuery = builder.build();
|
BooleanQuery booleanQuery = builder.build();
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
|
Result result = extractQueryTerms(booleanQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
Collections.sort(terms);
|
Collections.sort(terms);
|
||||||
assertThat(terms.size(), equalTo(4));
|
assertThat(terms.size(), equalTo(4));
|
||||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||||
|
@ -204,16 +229,74 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
builder.add(phraseQuery, BooleanClause.Occur.SHOULD);
|
builder.add(phraseQuery, BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
BooleanQuery booleanQuery = builder.build();
|
BooleanQuery booleanQuery = builder.build();
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
|
Result result = extractQueryTerms(booleanQuery);
|
||||||
|
assertThat(result.verified, is(false));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
assertThat(terms.size(), equalTo(1));
|
assertThat(terms.size(), equalTo(1));
|
||||||
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
|
assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
|
||||||
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
|
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testExactMatch_booleanQuery() {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1"));
|
||||||
|
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||||
|
TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2"));
|
||||||
|
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||||
|
Result result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||||
|
PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2");
|
||||||
|
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
|
||||||
|
result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
|
||||||
|
PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4");
|
||||||
|
builder.add(phraseQuery2, BooleanClause.Occur.SHOULD);
|
||||||
|
result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(termQuery1, BooleanClause.Occur.MUST_NOT);
|
||||||
|
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||||
|
result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32));
|
||||||
|
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||||
|
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||||
|
result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||||
|
result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("Single required clause, so candidate matches are verified", result.verified, is(false));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||||
|
builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||||
|
result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||||
|
builder.add(termQuery2, BooleanClause.Occur.MUST_NOT);
|
||||||
|
result = extractQueryTerms(builder.build());
|
||||||
|
assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false));
|
||||||
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_constantScoreQuery() {
|
public void testExtractQueryMetadata_constantScoreQuery() {
|
||||||
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
|
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
|
||||||
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
|
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
|
Result result = extractQueryTerms(constantScoreQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
assertThat(terms.size(), equalTo(1));
|
assertThat(terms.size(), equalTo(1));
|
||||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||||
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
||||||
|
@ -222,7 +305,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
public void testExtractQueryMetadata_boostQuery() {
|
public void testExtractQueryMetadata_boostQuery() {
|
||||||
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
|
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
|
||||||
BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
|
BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
|
Result result = extractQueryTerms(constantScoreQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
assertThat(terms.size(), equalTo(1));
|
assertThat(terms.size(), equalTo(1));
|
||||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||||
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
||||||
|
@ -232,7 +317,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
|
CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
|
||||||
commonTermsQuery.add(new Term("_field", "_term1"));
|
commonTermsQuery.add(new Term("_field", "_term1"));
|
||||||
commonTermsQuery.add(new Term("_field", "_term2"));
|
commonTermsQuery.add(new Term("_field", "_term2"));
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(commonTermsQuery));
|
Result result = extractQueryTerms(commonTermsQuery);
|
||||||
|
assertThat(result.verified, is(false));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
Collections.sort(terms);
|
Collections.sort(terms);
|
||||||
assertThat(terms.size(), equalTo(2));
|
assertThat(terms.size(), equalTo(2));
|
||||||
assertThat(terms.get(0).field(), equalTo("_field"));
|
assertThat(terms.get(0).field(), equalTo("_field"));
|
||||||
|
@ -242,15 +329,17 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_blendedTermQuery() {
|
public void testExtractQueryMetadata_blendedTermQuery() {
|
||||||
Term[] terms = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
|
Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
|
||||||
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(terms, false);
|
BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(termsArr, false);
|
||||||
List<Term> result = new ArrayList<>(extractQueryTerms(commonTermsQuery));
|
Result result = extractQueryTerms(commonTermsQuery);
|
||||||
Collections.sort(result);
|
assertThat(result.verified, is(true));
|
||||||
assertThat(result.size(), equalTo(2));
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
assertThat(result.get(0).field(), equalTo("_field"));
|
Collections.sort(terms);
|
||||||
assertThat(result.get(0).text(), equalTo("_term1"));
|
assertThat(terms.size(), equalTo(2));
|
||||||
assertThat(result.get(1).field(), equalTo("_field"));
|
assertThat(terms.get(0).field(), equalTo("_field"));
|
||||||
assertThat(result.get(1).text(), equalTo("_term2"));
|
assertThat(terms.get(0).text(), equalTo("_term1"));
|
||||||
|
assertThat(terms.get(1).field(), equalTo("_field"));
|
||||||
|
assertThat(terms.get(1).text(), equalTo("_term2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_spanTermQuery() {
|
public void testExtractQueryMetadata_spanTermQuery() {
|
||||||
|
@ -266,8 +355,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
// 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
|
// 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
|
||||||
|
|
||||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
Set<Term> terms = extractQueryTerms(spanTermQuery1);
|
Result result = extractQueryTerms(spanTermQuery1);
|
||||||
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
assertThat(result.verified, is(true));
|
||||||
|
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_spanNearQuery() {
|
public void testExtractQueryMetadata_spanNearQuery() {
|
||||||
|
@ -275,48 +365,109 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||||
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
|
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
|
||||||
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
|
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
|
||||||
Set<Term> terms = extractQueryTerms(spanNearQuery);
|
|
||||||
assertTermsEqual(terms, spanTermQuery2.getTerm());
|
Result result = extractQueryTerms(spanNearQuery);
|
||||||
|
assertThat(result.verified, is(false));
|
||||||
|
assertTermsEqual(result.terms, spanTermQuery2.getTerm());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_spanOrQuery() {
|
public void testExtractQueryMetadata_spanOrQuery() {
|
||||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||||
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
|
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
|
||||||
Set<Term> terms = extractQueryTerms(spanOrQuery);
|
Result result = extractQueryTerms(spanOrQuery);
|
||||||
assertTermsEqual(terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
|
assertThat(result.verified, is(false));
|
||||||
|
assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_spanFirstQuery() {
|
public void testExtractQueryMetadata_spanFirstQuery() {
|
||||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
|
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
|
||||||
Set<Term> terms = extractQueryTerms(spanFirstQuery);
|
Result result = extractQueryTerms(spanFirstQuery);
|
||||||
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
assertThat(result.verified, is(false));
|
||||||
|
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_spanNotQuery() {
|
public void testExtractQueryMetadata_spanNotQuery() {
|
||||||
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||||
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
|
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
|
||||||
Set<Term> terms = extractQueryTerms(spanNotQuery);
|
Result result = extractQueryTerms(spanNotQuery);
|
||||||
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
assertThat(result.verified, is(false));
|
||||||
|
assertTermsEqual(result.terms, spanTermQuery1.getTerm());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_matchNoDocsQuery() {
|
public void testExtractQueryMetadata_matchNoDocsQuery() {
|
||||||
Set<Term> terms = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
|
Result result = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
|
||||||
assertEquals(0, terms.size());
|
assertThat(result.verified, is(true));
|
||||||
|
assertEquals(0, result.terms.size());
|
||||||
|
|
||||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||||
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
|
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
|
||||||
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
|
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
|
||||||
terms = extractQueryTerms(bq.build());
|
result = extractQueryTerms(bq.build());
|
||||||
assertEquals(0, terms.size());
|
assertThat(result.verified, is(false));
|
||||||
|
assertEquals(0, result.terms.size());
|
||||||
|
|
||||||
bq = new BooleanQuery.Builder();
|
bq = new BooleanQuery.Builder();
|
||||||
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||||
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
|
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
|
||||||
terms = extractQueryTerms(bq.build());
|
result = extractQueryTerms(bq.build());
|
||||||
assertTermsEqual(terms, new Term("field", "value"));
|
assertThat(result.verified, is(true));
|
||||||
|
assertTermsEqual(result.terms, new Term("field", "value"));
|
||||||
|
|
||||||
|
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(
|
||||||
|
Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
|
||||||
|
1f
|
||||||
|
);
|
||||||
|
result = extractQueryTerms(disjunctionMaxQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
assertTermsEqual(result.terms, new Term("field", "value"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExtractQueryMetadata_matchAllDocsQuery() {
|
||||||
|
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(new MatchAllDocsQuery()));
|
||||||
|
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
Result result = extractQueryTerms(builder.build());
|
||||||
|
assertThat(result.verified, is(false));
|
||||||
|
assertTermsEqual(result.terms, new Term("field", "value"));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
BooleanQuery bq1 = builder.build();
|
||||||
|
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq1));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
BooleanQuery bq2 = builder.build();
|
||||||
|
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
BooleanQuery bq3 = builder.build();
|
||||||
|
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq3));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
BooleanQuery bq4 = builder.build();
|
||||||
|
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq4));
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
BooleanQuery bq5 = builder.build();
|
||||||
|
expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq5));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_unsupportedQuery() {
|
public void testExtractQueryMetadata_unsupportedQuery() {
|
||||||
|
@ -343,8 +494,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
||||||
BooleanQuery bq1 = builder.build();
|
BooleanQuery bq1 = builder.build();
|
||||||
|
|
||||||
Set<Term> terms = extractQueryTerms(bq1);
|
Result result = extractQueryTerms(bq1);
|
||||||
assertTermsEqual(terms, termQuery1.getTerm());
|
assertThat(result.verified, is(false));
|
||||||
|
assertTermsEqual(result.terms, termQuery1.getTerm());
|
||||||
|
|
||||||
TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term"));
|
TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term"));
|
||||||
builder = new BooleanQuery.Builder();
|
builder = new BooleanQuery.Builder();
|
||||||
|
@ -352,8 +504,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
builder.add(termQuery2, BooleanClause.Occur.MUST);
|
builder.add(termQuery2, BooleanClause.Occur.MUST);
|
||||||
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
||||||
bq1 = builder.build();
|
bq1 = builder.build();
|
||||||
terms = extractQueryTerms(bq1);
|
result = extractQueryTerms(bq1);
|
||||||
assertTermsEqual(terms, termQuery2.getTerm());
|
assertThat(result.verified, is(false));
|
||||||
|
assertTermsEqual(result.terms, termQuery2.getTerm());
|
||||||
|
|
||||||
builder = new BooleanQuery.Builder();
|
builder = new BooleanQuery.Builder();
|
||||||
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
|
||||||
|
@ -372,7 +525,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
|
Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
|
||||||
);
|
);
|
||||||
|
|
||||||
List<Term> terms = new ArrayList<>(extractQueryTerms(disjunctionMaxQuery));
|
Result result = extractQueryTerms(disjunctionMaxQuery);
|
||||||
|
assertThat(result.verified, is(true));
|
||||||
|
List<Term> terms = new ArrayList<>(result.terms);
|
||||||
|
Collections.sort(terms);
|
||||||
|
assertThat(terms.size(), equalTo(4));
|
||||||
|
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||||
|
assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
|
||||||
|
assertThat(terms.get(1).field(), equalTo(termQuery2.getTerm().field()));
|
||||||
|
assertThat(terms.get(1).bytes(), equalTo(termQuery2.getTerm().bytes()));
|
||||||
|
assertThat(terms.get(2).field(), equalTo(termQuery3.getTerm().field()));
|
||||||
|
assertThat(terms.get(2).bytes(), equalTo(termQuery3.getTerm().bytes()));
|
||||||
|
assertThat(terms.get(3).field(), equalTo(termQuery4.getTerm().field()));
|
||||||
|
assertThat(terms.get(3).bytes(), equalTo(termQuery4.getTerm().bytes()));
|
||||||
|
|
||||||
|
disjunctionMaxQuery = new DisjunctionMaxQuery(
|
||||||
|
Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
|
||||||
|
);
|
||||||
|
|
||||||
|
result = extractQueryTerms(disjunctionMaxQuery);
|
||||||
|
assertThat(result.verified, is(false));
|
||||||
|
terms = new ArrayList<>(result.terms);
|
||||||
Collections.sort(terms);
|
Collections.sort(terms);
|
||||||
assertThat(terms.size(), equalTo(4));
|
assertThat(terms.size(), equalTo(4));
|
||||||
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
|
||||||
|
@ -394,7 +567,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
|
|
||||||
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
|
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
|
||||||
TermsQuery query = (TermsQuery)
|
TermsQuery query = (TermsQuery)
|
||||||
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD);
|
createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));
|
||||||
|
|
||||||
PrefixCodedTerms terms = query.getTermData();
|
PrefixCodedTerms terms = query.getTermData();
|
||||||
assertThat(terms.size(), equalTo(15L));
|
assertThat(terms.size(), equalTo(15L));
|
||||||
|
@ -413,7 +586,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
|
assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
|
||||||
assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
|
assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
|
||||||
assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
|
assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
|
||||||
assertTermIterator(termIterator, "", UNKNOWN_QUERY_FIELD);
|
assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSelectTermsListWithHighestSumOfTermLength() {
|
public void testSelectTermsListWithHighestSumOfTermLength() {
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.NoMergePolicy;
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.memory.MemoryIndex;
|
import org.apache.lucene.index.memory.MemoryIndex;
|
||||||
|
@ -37,14 +38,21 @@ import org.apache.lucene.queries.CommonTermsQuery;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.FilterScorer;
|
||||||
|
import org.apache.lucene.search.FilteredDocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.PrefixQuery;
|
import org.apache.lucene.search.PrefixQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.WildcardQuery;
|
import org.apache.lucene.search.WildcardQuery;
|
||||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||||
|
@ -52,6 +60,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
|
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.index.mapper.ParseContext;
|
import org.elasticsearch.index.mapper.ParseContext;
|
||||||
import org.elasticsearch.index.mapper.Uid;
|
import org.elasticsearch.index.mapper.Uid;
|
||||||
|
@ -61,8 +70,11 @@ import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.arrayWithSize;
|
import static org.hamcrest.Matchers.arrayWithSize;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
@ -72,7 +84,7 @@ public class PercolateQueryTests extends ESTestCase {
|
||||||
|
|
||||||
public final static String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
|
public final static String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
|
||||||
public final static String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
|
public final static String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
|
||||||
public static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
|
public final static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false);
|
EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false);
|
||||||
|
@ -247,34 +259,91 @@ public class PercolateQueryTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDuel() throws Exception {
|
public void testDuel() throws Exception {
|
||||||
int numQueries = scaledRandomIntBetween(32, 256);
|
List<Function<String, Query>> queries = new ArrayList<>();
|
||||||
for (int i = 0; i < numQueries; i++) {
|
queries.add((id) -> new PrefixQuery(new Term("field", id)));
|
||||||
String id = Integer.toString(i);
|
queries.add((id) -> new WildcardQuery(new Term("field", id + "*")));
|
||||||
Query query;
|
queries.add((id) -> new CustomQuery(new Term("field", id)));
|
||||||
|
queries.add((id) -> new SpanTermQuery(new Term("field", id)));
|
||||||
|
queries.add((id) -> new TermQuery(new Term("field", id)));
|
||||||
|
queries.add((id) -> {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
return builder.build();
|
||||||
|
});
|
||||||
|
queries.add((id) -> {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
query = new PrefixQuery(new Term("field", id));
|
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||||
} else if (randomBoolean()) {
|
|
||||||
query = new WildcardQuery(new Term("field", id + "*"));
|
|
||||||
} else if (randomBoolean()) {
|
|
||||||
query = new CustomQuery(new Term("field", id + "*"));
|
|
||||||
} else if (randomBoolean()) {
|
|
||||||
query = new SpanTermQuery(new Term("field", id));
|
|
||||||
} else {
|
|
||||||
query = new TermQuery(new Term("field", id));
|
|
||||||
}
|
}
|
||||||
addPercolatorQuery(id, query);
|
if (randomBoolean()) {
|
||||||
|
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
});
|
||||||
|
queries.add((id) -> {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
});
|
||||||
|
queries.add((id) -> {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
});
|
||||||
|
queries.add((id) -> {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
});
|
||||||
|
queries.add((id) -> {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
|
||||||
|
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||||
|
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||||
|
return builder.build();
|
||||||
|
});
|
||||||
|
queries.add((id) -> new MatchAllDocsQuery());
|
||||||
|
queries.add((id) -> new MatchNoDocsQuery("no reason at all"));
|
||||||
|
|
||||||
|
int numDocs = randomIntBetween(queries.size(), queries.size() * 3);
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
String id = Integer.toString(i);
|
||||||
|
addPercolatorQuery(id, queries.get(i % queries.size()).apply(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
directoryReader = DirectoryReader.open(directory);
|
directoryReader = DirectoryReader.open(directory);
|
||||||
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
||||||
|
// Disable query cache, because ControlQuery cannot be cached...
|
||||||
|
shardSearcher.setQueryCache(null);
|
||||||
|
|
||||||
for (int i = 0; i < numQueries; i++) {
|
for (int i = 0; i < numDocs; i++) {
|
||||||
MemoryIndex memoryIndex = new MemoryIndex();
|
|
||||||
String id = Integer.toString(i);
|
String id = Integer.toString(i);
|
||||||
|
MemoryIndex memoryIndex = new MemoryIndex();
|
||||||
memoryIndex.addField("field", id, new WhitespaceAnalyzer());
|
memoryIndex.addField("field", id, new WhitespaceAnalyzer());
|
||||||
duelRun(memoryIndex, shardSearcher);
|
duelRun(memoryIndex, shardSearcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MemoryIndex memoryIndex = new MemoryIndex();
|
||||||
|
memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
|
||||||
|
duelRun(memoryIndex, shardSearcher);
|
||||||
|
// Empty percolator doc:
|
||||||
|
memoryIndex = new MemoryIndex();
|
||||||
|
duelRun(memoryIndex, shardSearcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDuelSpecificQueries() throws Exception {
|
public void testDuelSpecificQueries() throws Exception {
|
||||||
|
@ -312,6 +381,8 @@ public class PercolateQueryTests extends ESTestCase {
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
directoryReader = DirectoryReader.open(directory);
|
directoryReader = DirectoryReader.open(directory);
|
||||||
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
||||||
|
// Disable query cache, because ControlQuery cannot be cached...
|
||||||
|
shardSearcher.setQueryCache(null);
|
||||||
|
|
||||||
MemoryIndex memoryIndex = new MemoryIndex();
|
MemoryIndex memoryIndex = new MemoryIndex();
|
||||||
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
|
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
|
||||||
|
@ -332,33 +403,33 @@ public class PercolateQueryTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
|
private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
|
||||||
|
boolean requireScore = randomBoolean();
|
||||||
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
|
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
|
||||||
PercolateQuery.Builder builder1 = new PercolateQuery.Builder(
|
PercolateQuery.Builder builder = new PercolateQuery.Builder(
|
||||||
"docType",
|
"docType",
|
||||||
queryStore,
|
queryStore,
|
||||||
new BytesArray("{}"),
|
new BytesArray("{}"),
|
||||||
percolateSearcher
|
percolateSearcher
|
||||||
);
|
);
|
||||||
// enables the optimization that prevents queries from being evaluated that don't match
|
// enables the optimization that prevents queries from being evaluated that don't match
|
||||||
builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
|
builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
|
||||||
TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10);
|
Query query = requireScore ? builder.build() : new ConstantScoreQuery(builder.build());
|
||||||
|
TopDocs topDocs = shardSearcher.search(query, 10);
|
||||||
|
|
||||||
PercolateQuery.Builder builder2 = new PercolateQuery.Builder(
|
Query controlQuery = new ControlQuery(memoryIndex, queryStore);
|
||||||
"docType",
|
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
|
||||||
queryStore,
|
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
|
||||||
new BytesArray("{}"),
|
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
|
||||||
percolateSearcher
|
assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
|
||||||
);
|
for (int j = 0; j < topDocs.scoreDocs.length; j++) {
|
||||||
builder2.setPercolateTypeQuery(new MatchAllDocsQuery());
|
assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
|
||||||
TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10);
|
assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
|
||||||
assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits));
|
if (requireScore) {
|
||||||
assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length));
|
Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
|
||||||
for (int j = 0; j < topDocs1.scoreDocs.length; j++) {
|
Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
|
||||||
assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc));
|
assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
|
||||||
assertThat(topDocs1.scoreDocs[j].score, equalTo(topDocs2.scoreDocs[j].score));
|
assertThat(explain1.getValue(), equalTo(explain2.getValue()));
|
||||||
Explanation explain1 = shardSearcher.explain(builder1.build(), topDocs1.scoreDocs[j].doc);
|
}
|
||||||
Explanation explain2 = shardSearcher.explain(builder2.build(), topDocs2.scoreDocs[j].doc);
|
|
||||||
assertThat(explain1.toHtml(), equalTo(explain2.toHtml()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -391,4 +462,89 @@ public class PercolateQueryTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static class ControlQuery extends Query {
|
||||||
|
|
||||||
|
private final MemoryIndex memoryIndex;
|
||||||
|
private final PercolateQuery.QueryStore queryStore;
|
||||||
|
|
||||||
|
private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
|
||||||
|
this.memoryIndex = memoryIndex;
|
||||||
|
this.queryStore = queryStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
|
||||||
|
return new ConstantScoreWeight(this) {
|
||||||
|
|
||||||
|
float _score;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
Scorer scorer = scorer(context);
|
||||||
|
if (scorer != null) {
|
||||||
|
int result = scorer.iterator().advance(doc);
|
||||||
|
if (result == doc) {
|
||||||
|
return Explanation.match(scorer.score(), "ControlQuery");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Explanation.noMatch("ControlQuery");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "weight(" + ControlQuery.this + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
|
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
|
||||||
|
PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context);
|
||||||
|
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean match(int doc) {
|
||||||
|
try {
|
||||||
|
Query query = leaf.getQuery(doc);
|
||||||
|
float score = memoryIndex.search(query);
|
||||||
|
if (score != 0f) {
|
||||||
|
if (needsScores) {
|
||||||
|
_score = score;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException {
|
||||||
|
return _score;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "control{" + field + "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return sameClassAs(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return classHash();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,8 @@ import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
|
||||||
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
|
||||||
|
import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
|
||||||
import static org.hamcrest.Matchers.containsString;
|
import static org.hamcrest.Matchers.containsString;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.instanceOf;
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
@ -71,7 +73,9 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
indexService = createIndex("test", Settings.EMPTY);
|
indexService = createIndex("test", Settings.EMPTY);
|
||||||
mapperService = indexService.mapperService();
|
mapperService = indexService.mapperService();
|
||||||
|
|
||||||
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
|
String mapper = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
|
||||||
|
.startObject("properties")
|
||||||
.startObject("field").field("type", "text").endObject()
|
.startObject("field").field("type", "text").endObject()
|
||||||
.startObject("number_field").field("type", "long").endObject()
|
.startObject("number_field").field("type", "long").endObject()
|
||||||
.startObject("date_field").field("type", "date").endObject()
|
.startObject("date_field").field("type", "date").endObject()
|
||||||
|
@ -96,20 +100,21 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
.field(fieldName, queryBuilder)
|
.field(fieldName, queryBuilder)
|
||||||
.endObject().bytes());
|
.endObject().bytes());
|
||||||
|
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(0));
|
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1));
|
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1));
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
|
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
|
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
|
||||||
|
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
|
||||||
|
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_COMPLETE));
|
||||||
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
|
BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
|
||||||
assertQueryBuilder(qbSource, queryBuilder);
|
assertQueryBuilder(qbSource, queryBuilder);
|
||||||
|
|
||||||
// add an query for which we don't extract terms from
|
// add an query for which we don't extract terms from
|
||||||
queryBuilder = matchAllQuery();
|
queryBuilder = rangeQuery("field").from("a").to("z");
|
||||||
doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
|
doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
|
||||||
.field(fieldName, queryBuilder)
|
.field(fieldName, queryBuilder)
|
||||||
.endObject().bytes());
|
.endObject().bytes());
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(1));
|
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName())[0].binaryValue(), equalTo(new BytesRef()));
|
assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_FAILED));
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0));
|
assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0));
|
||||||
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
|
assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
|
||||||
qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
|
qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
|
||||||
|
@ -195,6 +200,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
public void testMultiplePercolatorFields() throws Exception {
|
public void testMultiplePercolatorFields() throws Exception {
|
||||||
String typeName = "another_type";
|
String typeName = "another_type";
|
||||||
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
|
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
|
||||||
|
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("query_field1").field("type", "percolator").endObject()
|
.startObject("query_field1").field("type", "percolator").endObject()
|
||||||
.startObject("query_field2").field("type", "percolator").endObject()
|
.startObject("query_field2").field("type", "percolator").endObject()
|
||||||
|
@ -209,7 +215,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
.field("query_field2", queryBuilder)
|
.field("query_field2", queryBuilder)
|
||||||
.endObject().bytes()
|
.endObject().bytes()
|
||||||
);
|
);
|
||||||
assertThat(doc.rootDoc().getFields().size(), equalTo(22)); // also includes all other meta fields
|
assertThat(doc.rootDoc().getFields().size(), equalTo(11)); // also includes _uid (1), type (2), source (1)
|
||||||
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
|
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
|
||||||
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
||||||
|
|
||||||
|
@ -221,6 +227,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
public void testNestedPercolatorField() throws Exception {
|
public void testNestedPercolatorField() throws Exception {
|
||||||
String typeName = "another_type";
|
String typeName = "another_type";
|
||||||
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
|
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
|
||||||
|
.startObject("_field_names").field("enabled", false).endObject() // makes testing easier
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("object_field")
|
.startObject("object_field")
|
||||||
.field("type", "object")
|
.field("type", "object")
|
||||||
|
@ -238,7 +245,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
.field("query_field", queryBuilder)
|
.field("query_field", queryBuilder)
|
||||||
.endObject().endObject().bytes()
|
.endObject().endObject().bytes()
|
||||||
);
|
);
|
||||||
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
|
assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
|
||||||
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
|
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
|
||||||
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
||||||
|
|
||||||
|
@ -249,7 +256,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
.endArray()
|
.endArray()
|
||||||
.endObject().bytes()
|
.endObject().bytes()
|
||||||
);
|
);
|
||||||
assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
|
assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
|
||||||
queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
|
queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
|
||||||
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
|
||||||
|
|
||||||
|
|
|
@ -397,12 +397,16 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
||||||
.addMapping("employee", mapping)
|
.addMapping("employee", mapping)
|
||||||
.addMapping("queries", "query", "type=percolator")
|
.addMapping("queries", "query", "type=percolator")
|
||||||
);
|
);
|
||||||
client().prepareIndex("test", "queries", "q").setSource(jsonBuilder().startObject()
|
client().prepareIndex("test", "queries", "q1").setSource(jsonBuilder().startObject()
|
||||||
.field("query", QueryBuilders.nestedQuery("employee",
|
.field("query", QueryBuilders.nestedQuery("employee",
|
||||||
QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
|
QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
|
||||||
).endObject())
|
).endObject())
|
||||||
.setRefreshPolicy(IMMEDIATE)
|
|
||||||
.get();
|
.get();
|
||||||
|
// this query should never match as it doesn't use nested query:
|
||||||
|
client().prepareIndex("test", "queries", "q2").setSource(jsonBuilder().startObject()
|
||||||
|
.field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject())
|
||||||
|
.get();
|
||||||
|
client().admin().indices().prepareRefresh().get();
|
||||||
|
|
||||||
SearchResponse response = client().prepareSearch()
|
SearchResponse response = client().prepareSearch()
|
||||||
.setQuery(new PercolateQueryBuilder("query", "employee",
|
.setQuery(new PercolateQueryBuilder("query", "employee",
|
||||||
|
@ -413,9 +417,10 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
||||||
.startObject().field("name", "tony stark").endObject()
|
.startObject().field("name", "tony stark").endObject()
|
||||||
.endArray()
|
.endArray()
|
||||||
.endObject().bytes()))
|
.endObject().bytes()))
|
||||||
|
.addSort("_doc", SortOrder.ASC)
|
||||||
.get();
|
.get();
|
||||||
assertHitCount(response, 1);
|
assertHitCount(response, 1);
|
||||||
assertThat(response.getHits().getAt(0).getId(), equalTo("q"));
|
assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
|
||||||
|
|
||||||
response = client().prepareSearch()
|
response = client().prepareSearch()
|
||||||
.setQuery(new PercolateQueryBuilder("query", "employee",
|
.setQuery(new PercolateQueryBuilder("query", "employee",
|
||||||
|
@ -426,12 +431,14 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
||||||
.startObject().field("name", "tony stark").endObject()
|
.startObject().field("name", "tony stark").endObject()
|
||||||
.endArray()
|
.endArray()
|
||||||
.endObject().bytes()))
|
.endObject().bytes()))
|
||||||
|
.addSort("_doc", SortOrder.ASC)
|
||||||
.get();
|
.get();
|
||||||
assertHitCount(response, 0);
|
assertHitCount(response, 0);
|
||||||
|
|
||||||
response = client().prepareSearch()
|
response = client().prepareSearch()
|
||||||
.setQuery(new PercolateQueryBuilder("query", "employee",
|
.setQuery(new PercolateQueryBuilder("query", "employee",
|
||||||
XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject().bytes()))
|
XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject().bytes()))
|
||||||
|
.addSort("_doc", SortOrder.ASC)
|
||||||
.get();
|
.get();
|
||||||
assertHitCount(response, 0);
|
assertHitCount(response, 0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue