Improved percolator's random candidate query duel test and
fixed bugs that were exposed by this: * Duplicates query leafs were not detected in a multi level boolean query * Tracking fields for numeric range queries did not work properly. * The sorting that was used to find the less restrictive clauses in disjunction query did not work too.
This commit is contained in:
parent
bc8b3fc71c
commit
bcfb7ab591
|
@ -54,6 +54,7 @@ import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -61,6 +62,7 @@ import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.function.BiFunction;
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static java.util.stream.Collectors.toSet;
|
import static java.util.stream.Collectors.toSet;
|
||||||
|
|
||||||
|
@ -366,36 +368,38 @@ final class QueryAnalyzer {
|
||||||
Set<QueryExtraction> extractions = new HashSet<>();
|
Set<QueryExtraction> extractions = new HashSet<>();
|
||||||
Set<String> seenRangeFields = new HashSet<>();
|
Set<String> seenRangeFields = new HashSet<>();
|
||||||
for (Result result : results) {
|
for (Result result : results) {
|
||||||
QueryExtraction[] t = result.extractions.toArray(new QueryExtraction[1]);
|
// In case that there are duplicate query extractions we need to be careful with incrementing msm,
|
||||||
if (result.extractions.size() == 1 && t[0].range != null) {
|
// because that could lead to valid matches not becoming candidate matches:
|
||||||
// In case of range queries each extraction does not simply increment the minimum_should_match
|
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
|
||||||
// for that percolator query like for a term based extraction, so that can lead to more false
|
// doc: field: val1 val2 val3
|
||||||
// positives for percolator queries with range queries than term based queries.
|
// So lets be protective and decrease the msm:
|
||||||
// The is because the way number fields are extracted from the document to be percolated.
|
int resultMsm = result.minimumShouldMatch;
|
||||||
// Per field a single range is extracted and if a percolator query has two or more range queries
|
for (QueryExtraction queryExtraction : result.extractions) {
|
||||||
// on the same field, then the minimum should match can be higher than clauses in the CoveringQuery.
|
if (queryExtraction.range != null) {
|
||||||
// Therefore right now the minimum should match is incremented once per number field when processing
|
// In case of range queries each extraction does not simply increment the minimum_should_match
|
||||||
// the percolator query at index time.
|
// for that percolator query like for a term based extraction, so that can lead to more false
|
||||||
if (seenRangeFields.add(t[0].range.fieldName)) {
|
// positives for percolator queries with range queries than term based queries.
|
||||||
msm += 1;
|
// The is because the way number fields are extracted from the document to be percolated.
|
||||||
}
|
// Per field a single range is extracted and if a percolator query has two or more range queries
|
||||||
} else {
|
// on the same field, then the minimum should match can be higher than clauses in the CoveringQuery.
|
||||||
// In case that there are duplicate query extractions we need to be careful with incrementing msm,
|
// Therefore right now the minimum should match is incremented once per number field when processing
|
||||||
// because that could lead to valid matches not becoming candidate matches:
|
// the percolator query at index time.
|
||||||
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
|
if (seenRangeFields.add(queryExtraction.range.fieldName)) {
|
||||||
// doc: field: val1 val2 val3
|
resultMsm = 1;
|
||||||
// So lets be protective and decrease the msm:
|
} else {
|
||||||
int resultMsm = result.minimumShouldMatch;
|
resultMsm = 0;
|
||||||
for (QueryExtraction queryExtraction : result.extractions) {
|
|
||||||
if (extractions.contains(queryExtraction)) {
|
|
||||||
// To protect against negative msm:
|
|
||||||
// (sub results could consist out of disjunction and conjunction and
|
|
||||||
// then we do not know which extraction contributed to msm)
|
|
||||||
resultMsm = Math.max(0, resultMsm - 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
msm += resultMsm;
|
|
||||||
|
if (extractions.contains(queryExtraction)) {
|
||||||
|
// To protect against negative msm:
|
||||||
|
// (sub results could consist out of disjunction and conjunction and
|
||||||
|
// then we do not know which extraction contributed to msm)
|
||||||
|
resultMsm = Math.max(0, resultMsm - 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
msm += resultMsm;
|
||||||
|
|
||||||
verified &= result.verified;
|
verified &= result.verified;
|
||||||
matchAllDocs &= result.matchAllDocs;
|
matchAllDocs &= result.matchAllDocs;
|
||||||
extractions.addAll(result.extractions);
|
extractions.addAll(result.extractions);
|
||||||
|
@ -518,8 +522,7 @@ final class QueryAnalyzer {
|
||||||
private static Result handleDisjunction(List<Query> disjunctions, int requiredShouldClauses, boolean otherClauses,
|
private static Result handleDisjunction(List<Query> disjunctions, int requiredShouldClauses, boolean otherClauses,
|
||||||
Version version) {
|
Version version) {
|
||||||
// Keep track of the msm for each clause:
|
// Keep track of the msm for each clause:
|
||||||
int[] msmPerClause = new int[disjunctions.size()];
|
List<DisjunctionClause> clauses = new ArrayList<>(disjunctions.size());
|
||||||
String[] rangeFieldNames = new String[disjunctions.size()];
|
|
||||||
boolean verified = otherClauses == false;
|
boolean verified = otherClauses == false;
|
||||||
if (version.before(Version.V_6_1_0)) {
|
if (version.before(Version.V_6_1_0)) {
|
||||||
verified &= requiredShouldClauses <= 1;
|
verified &= requiredShouldClauses <= 1;
|
||||||
|
@ -535,17 +538,14 @@ final class QueryAnalyzer {
|
||||||
}
|
}
|
||||||
int resultMsm = subResult.minimumShouldMatch;
|
int resultMsm = subResult.minimumShouldMatch;
|
||||||
for (QueryExtraction extraction : subResult.extractions) {
|
for (QueryExtraction extraction : subResult.extractions) {
|
||||||
if (terms.contains(extraction)) {
|
if (terms.add(extraction) == false) {
|
||||||
resultMsm = Math.max(1, resultMsm - 1);
|
resultMsm = Math.max(0, resultMsm - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
msmPerClause[i] = resultMsm;
|
clauses.add(new DisjunctionClause(resultMsm, subResult.extractions.stream()
|
||||||
terms.addAll(subResult.extractions);
|
.filter(extraction -> extraction.range != null)
|
||||||
|
.map(extraction -> extraction.range.fieldName)
|
||||||
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
|
.collect(toSet())));
|
||||||
if (subResult.extractions.size() == 1 && t[0].range != null) {
|
|
||||||
rangeFieldNames[i] = t[0].range.fieldName;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
|
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
|
||||||
|
|
||||||
|
@ -554,15 +554,20 @@ final class QueryAnalyzer {
|
||||||
Set<String> seenRangeFields = new HashSet<>();
|
Set<String> seenRangeFields = new HashSet<>();
|
||||||
// Figure out what the combined msm is for this disjunction:
|
// Figure out what the combined msm is for this disjunction:
|
||||||
// (sum the lowest required clauses, otherwise we're too strict and queries may not match)
|
// (sum the lowest required clauses, otherwise we're too strict and queries may not match)
|
||||||
Arrays.sort(msmPerClause);
|
clauses = clauses.stream()
|
||||||
int limit = Math.min(msmPerClause.length, Math.max(1, requiredShouldClauses));
|
.filter(o -> o.msm > 0)
|
||||||
|
.sorted(Comparator.comparingInt(o -> o.msm))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
int limit = Math.min(clauses.size(), Math.max(1, requiredShouldClauses));
|
||||||
for (int i = 0; i < limit; i++) {
|
for (int i = 0; i < limit; i++) {
|
||||||
if (rangeFieldNames[i] != null) {
|
if (clauses.get(i).rangeFieldNames.isEmpty() == false) {
|
||||||
if (seenRangeFields.add(rangeFieldNames[i])) {
|
for (String rangeField: clauses.get(i).rangeFieldNames) {
|
||||||
msm += 1;
|
if (seenRangeFields.add(rangeField)) {
|
||||||
|
msm += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
msm += msmPerClause[i];
|
msm += clauses.get(i).msm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -575,6 +580,17 @@ final class QueryAnalyzer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class DisjunctionClause {
|
||||||
|
|
||||||
|
final int msm;
|
||||||
|
final Set<String> rangeFieldNames;
|
||||||
|
|
||||||
|
DisjunctionClause(int msm, Set<String> rangeFieldNames) {
|
||||||
|
this.msm = msm;
|
||||||
|
this.rangeFieldNames = rangeFieldNames;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
|
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
|
||||||
assert extractions1 != null || extractions2 != null;
|
assert extractions1 != null || extractions2 != null;
|
||||||
if (extractions1 == null) {
|
if (extractions1 == null) {
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.document.HalfFloatPoint;
|
||||||
import org.apache.lucene.document.InetAddressPoint;
|
import org.apache.lucene.document.InetAddressPoint;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
@ -36,7 +37,9 @@ import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
import org.apache.lucene.index.NoMergePolicy;
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.memory.MemoryIndex;
|
import org.apache.lucene.index.memory.MemoryIndex;
|
||||||
import org.apache.lucene.queries.BlendedTermQuery;
|
import org.apache.lucene.queries.BlendedTermQuery;
|
||||||
|
@ -44,12 +47,10 @@ import org.apache.lucene.queries.CommonTermsQuery;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
|
||||||
import org.apache.lucene.search.CoveringQuery;
|
import org.apache.lucene.search.CoveringQuery;
|
||||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
import org.apache.lucene.search.FilterScorer;
|
|
||||||
import org.apache.lucene.search.FilteredDocIdSetIterator;
|
import org.apache.lucene.search.FilteredDocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
@ -76,11 +77,14 @@ import org.elasticsearch.common.CheckedFunction;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.compress.CompressedXContent;
|
import org.elasticsearch.common.compress.CompressedXContent;
|
||||||
|
import org.elasticsearch.common.geo.ShapeRelation;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||||
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
|
import org.elasticsearch.index.mapper.NumberFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.ParseContext;
|
import org.elasticsearch.index.mapper.ParseContext;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
@ -166,59 +170,53 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
public void testDuel() throws Exception {
|
public void testDuel() throws Exception {
|
||||||
int numFields = randomIntBetween(1, 3);
|
int numFields = randomIntBetween(1, 3);
|
||||||
Map<String, List<String>> content = new HashMap<>();
|
Map<String, List<String>> stringContent = new HashMap<>();
|
||||||
for (int i = 0; i < numFields; i++) {
|
for (int i = 0; i < numFields; i++) {
|
||||||
int numTokens = randomIntBetween(1, 64);
|
int numTokens = randomIntBetween(1, 64);
|
||||||
List<String> values = new ArrayList<>();
|
List<String> values = new ArrayList<>();
|
||||||
for (int j = 0; j < numTokens; j++) {
|
for (int j = 0; j < numTokens; j++) {
|
||||||
values.add(randomAlphaOfLength(8));
|
values.add(randomAlphaOfLength(8));
|
||||||
}
|
}
|
||||||
content.put("field" + i, values);
|
stringContent.put("field" + i, values);
|
||||||
}
|
}
|
||||||
List<String> fields = new ArrayList<>(content.keySet());
|
List<String> stringFields = new ArrayList<>(stringContent.keySet());
|
||||||
|
|
||||||
|
int numValues = randomIntBetween(16, 64);
|
||||||
|
List<Integer> intValues = new ArrayList<>(numValues);
|
||||||
|
for (int j = 0; j < numValues; j++) {
|
||||||
|
intValues.add(randomInt());
|
||||||
|
}
|
||||||
|
Collections.sort(intValues);
|
||||||
|
|
||||||
|
MappedFieldType intFieldType = mapperService.documentMapper("type").mappers()
|
||||||
|
.getMapper("int_field").fieldType();
|
||||||
|
|
||||||
List<Supplier<Query>> queryFunctions = new ArrayList<>();
|
List<Supplier<Query>> queryFunctions = new ArrayList<>();
|
||||||
queryFunctions.add(MatchNoDocsQuery::new);
|
queryFunctions.add(MatchNoDocsQuery::new);
|
||||||
queryFunctions.add(MatchAllDocsQuery::new);
|
queryFunctions.add(MatchAllDocsQuery::new);
|
||||||
queryFunctions.add(() -> new TermQuery(new Term("unknown_field", "value")));
|
queryFunctions.add(() -> new TermQuery(new Term("unknown_field", "value")));
|
||||||
String field1 = randomFrom(fields);
|
String field1 = randomFrom(stringFields);
|
||||||
queryFunctions.add(() -> new TermQuery(new Term(field1, randomFrom(content.get(field1)))));
|
queryFunctions.add(() -> new TermQuery(new Term(field1, randomFrom(stringContent.get(field1)))));
|
||||||
String field2 = randomFrom(fields);
|
String field2 = randomFrom(stringFields);
|
||||||
queryFunctions.add(() -> new TermQuery(new Term(field2, randomFrom(content.get(field2)))));
|
queryFunctions.add(() -> new TermQuery(new Term(field2, randomFrom(stringContent.get(field2)))));
|
||||||
queryFunctions.add(() -> new TermInSetQuery(field1, new BytesRef(randomFrom(content.get(field1))),
|
queryFunctions.add(() -> intFieldType.termQuery(randomFrom(intValues), null));
|
||||||
new BytesRef(randomFrom(content.get(field1)))));
|
queryFunctions.add(() -> intFieldType.termsQuery(Arrays.asList(randomFrom(intValues), randomFrom(intValues)), null));
|
||||||
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(content.get(field1))),
|
queryFunctions.add(() -> intFieldType.rangeQuery(intValues.get(4), intValues.get(intValues.size() - 4), true,
|
||||||
new BytesRef(randomFrom(content.get(field1)))));
|
true, ShapeRelation.WITHIN, null, null, null));
|
||||||
queryFunctions.add(() -> {
|
queryFunctions.add(() -> new TermInSetQuery(field1, new BytesRef(randomFrom(stringContent.get(field1))),
|
||||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
new BytesRef(randomFrom(stringContent.get(field1)))));
|
||||||
int numClauses = randomIntBetween(1, 16);
|
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))),
|
||||||
for (int i = 0; i < numClauses; i++) {
|
new BytesRef(randomFrom(stringContent.get(field1)))));
|
||||||
if (rarely()) {
|
int numRandomBoolQueries = randomIntBetween(16, 32);
|
||||||
if (randomBoolean()) {
|
for (int i = 0; i < numRandomBoolQueries; i++) {
|
||||||
Occur occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
|
queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues));
|
||||||
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), occur);
|
}
|
||||||
} else {
|
|
||||||
String field = randomFrom(fields);
|
|
||||||
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), Occur.MUST_NOT);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (randomBoolean()) {
|
|
||||||
Occur occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
|
|
||||||
String field = randomFrom(fields);
|
|
||||||
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur);
|
|
||||||
} else {
|
|
||||||
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), Occur.MUST_NOT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return builder.build();
|
|
||||||
});
|
|
||||||
queryFunctions.add(() -> {
|
queryFunctions.add(() -> {
|
||||||
int numClauses = randomIntBetween(1, 16);
|
int numClauses = randomIntBetween(1, 16);
|
||||||
List<Query> clauses = new ArrayList<>();
|
List<Query> clauses = new ArrayList<>();
|
||||||
for (int i = 0; i < numClauses; i++) {
|
for (int i = 0; i < numClauses; i++) {
|
||||||
String field = randomFrom(fields);
|
String field = randomFrom(stringFields);
|
||||||
clauses.add(new TermQuery(new Term(field, randomFrom(content.get(field)))));
|
clauses.add(new TermQuery(new Term(field, randomFrom(stringContent.get(field)))));
|
||||||
}
|
}
|
||||||
return new DisjunctionMaxQuery(clauses, 0.01f);
|
return new DisjunctionMaxQuery(clauses, 0.01f);
|
||||||
});
|
});
|
||||||
|
@ -237,14 +235,75 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
shardSearcher.setQueryCache(null);
|
shardSearcher.setQueryCache(null);
|
||||||
|
|
||||||
Document document = new Document();
|
Document document = new Document();
|
||||||
for (Map.Entry<String, List<String>> entry : content.entrySet()) {
|
for (Map.Entry<String, List<String>> entry : stringContent.entrySet()) {
|
||||||
String value = entry.getValue().stream().collect(Collectors.joining(" "));
|
String value = entry.getValue().stream().collect(Collectors.joining(" "));
|
||||||
document.add(new TextField(entry.getKey(), value, Field.Store.NO));
|
document.add(new TextField(entry.getKey(), value, Field.Store.NO));
|
||||||
}
|
}
|
||||||
|
for (Integer intValue : intValues) {
|
||||||
|
List<Field> numberFields =
|
||||||
|
NumberFieldMapper.NumberType.INTEGER.createFields("int_field", intValue, true, true, false);
|
||||||
|
for (Field numberField : numberFields) {
|
||||||
|
document.add(numberField);
|
||||||
|
}
|
||||||
|
}
|
||||||
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
|
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
|
||||||
duelRun(queryStore, memoryIndex, shardSearcher);
|
duelRun(queryStore, memoryIndex, shardSearcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Map<String, List<String>> content,
|
||||||
|
MappedFieldType intFieldType, List<Integer> intValues) {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
int numClauses = randomIntBetween(1, 16);
|
||||||
|
int numShouldClauses = 0;
|
||||||
|
boolean onlyShouldClauses = rarely();
|
||||||
|
for (int i = 0; i < numClauses; i++) {
|
||||||
|
Occur occur;
|
||||||
|
if (onlyShouldClauses) {
|
||||||
|
occur = Occur.SHOULD;
|
||||||
|
if (randomBoolean()) {
|
||||||
|
String field = randomFrom(fields);
|
||||||
|
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur);
|
||||||
|
} else {
|
||||||
|
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur);
|
||||||
|
}
|
||||||
|
} else if (rarely() && depth <= 3) {
|
||||||
|
occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
|
||||||
|
builder.add(createRandomBooleanQuery(depth + 1, fields, content, intFieldType, intValues), occur);
|
||||||
|
} else if (rarely()) {
|
||||||
|
if (randomBoolean()) {
|
||||||
|
occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), occur);
|
||||||
|
} else {
|
||||||
|
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur);
|
||||||
|
}
|
||||||
|
} else if (randomBoolean()) {
|
||||||
|
String field = randomFrom(fields);
|
||||||
|
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur = Occur.MUST_NOT);
|
||||||
|
} else {
|
||||||
|
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur = Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (randomBoolean()) {
|
||||||
|
occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
|
||||||
|
if (randomBoolean()) {
|
||||||
|
String field = randomFrom(fields);
|
||||||
|
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur);
|
||||||
|
} else {
|
||||||
|
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), occur = Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (occur == Occur.SHOULD) {
|
||||||
|
numShouldClauses++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
builder.setMinimumNumberShouldMatch(numShouldClauses);
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
public void testDuelIdBased() throws Exception {
|
public void testDuelIdBased() throws Exception {
|
||||||
List<Function<String, Query>> queryFunctions = new ArrayList<>();
|
List<Function<String, Query>> queryFunctions = new ArrayList<>();
|
||||||
queryFunctions.add((id) -> new PrefixQuery(new Term("field", id)));
|
queryFunctions.add((id) -> new PrefixQuery(new Term("field", id)));
|
||||||
|
@ -766,11 +825,11 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
Query percolateQuery = fieldType.percolateQuery("_name", queryStore,
|
Query percolateQuery = fieldType.percolateQuery("_name", queryStore,
|
||||||
Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT);
|
Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT);
|
||||||
Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery);
|
Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery);
|
||||||
TopDocs topDocs = shardSearcher.search(query, 10);
|
TopDocs topDocs = shardSearcher.search(query, 100);
|
||||||
|
|
||||||
Query controlQuery = new ControlQuery(memoryIndex, queryStore);
|
Query controlQuery = new ControlQuery(memoryIndex, queryStore);
|
||||||
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
|
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
|
||||||
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
|
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 100);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
|
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
|
||||||
|
@ -793,22 +852,39 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
logger.error("controlTopDocs.scoreDocs.length={}", controlTopDocs.scoreDocs.length);
|
logger.error("controlTopDocs.scoreDocs.length={}", controlTopDocs.scoreDocs.length);
|
||||||
|
|
||||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||||
logger.error("topDocs.scoreDocs[j].doc={}", topDocs.scoreDocs[i].doc);
|
logger.error("topDocs.scoreDocs[{}].doc={}", i, topDocs.scoreDocs[i].doc);
|
||||||
logger.error("topDocs.scoreDocs[j].score={}", topDocs.scoreDocs[i].score);
|
logger.error("topDocs.scoreDocs[{}].score={}", i, topDocs.scoreDocs[i].score);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < controlTopDocs.scoreDocs.length; i++) {
|
for (int i = 0; i < controlTopDocs.scoreDocs.length; i++) {
|
||||||
logger.error("controlTopDocs.scoreDocs[j].doc={}", controlTopDocs.scoreDocs[i].doc);
|
logger.error("controlTopDocs.scoreDocs[{}].doc={}", i, controlTopDocs.scoreDocs[i].doc);
|
||||||
logger.error("controlTopDocs.scoreDocs[j].score={}", controlTopDocs.scoreDocs[i].score);
|
logger.error("controlTopDocs.scoreDocs[{}].score={}", i, controlTopDocs.scoreDocs[i].score);
|
||||||
|
|
||||||
|
// Additional stored information that is useful when debugging:
|
||||||
|
String queryToString = shardSearcher.doc(controlTopDocs.scoreDocs[i].doc).get("query_to_string");
|
||||||
|
logger.error("topDocs.scoreDocs[{}].query_to_string={}", i, queryToString);
|
||||||
|
|
||||||
|
NumericDocValues numericValues =
|
||||||
|
MultiDocValues.getNumericValues(shardSearcher.getIndexReader(), fieldType.minimumShouldMatchField.name());
|
||||||
|
boolean exact = numericValues.advanceExact(controlTopDocs.scoreDocs[i].doc);
|
||||||
|
if (exact) {
|
||||||
|
logger.error("controlTopDocs.scoreDocs[{}].minimum_should_match_field={}", i, numericValues.longValue());
|
||||||
|
} else {
|
||||||
|
// Some queries do not have a msm field. (e.g. unsupported queries)
|
||||||
|
logger.error("controlTopDocs.scoreDocs[{}].minimum_should_match_field=[NO_VALUE]", i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
throw ae;
|
throw ae;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addQuery(Query query, List<ParseContext.Document> docs) throws IOException {
|
private void addQuery(Query query, List<ParseContext.Document> docs) {
|
||||||
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
|
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
|
||||||
mapperService.documentMapperParser(), documentMapper, null, null);
|
mapperService.documentMapperParser(), documentMapper, null, null);
|
||||||
fieldMapper.processQuery(query, parseContext);
|
fieldMapper.processQuery(query, parseContext);
|
||||||
docs.add(parseContext.doc());
|
ParseContext.Document queryDocument = parseContext.doc();
|
||||||
|
// Add to string representation of the query to make debugging easier:
|
||||||
|
queryDocument.add(new StoredField("query_to_string", query.toString()));
|
||||||
|
docs.add(queryDocument);
|
||||||
queries.add(query);
|
queries.add(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -865,8 +941,6 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
final IndexSearcher percolatorIndexSearcher = memoryIndex.createSearcher();
|
final IndexSearcher percolatorIndexSearcher = memoryIndex.createSearcher();
|
||||||
return new Weight(this) {
|
return new Weight(this) {
|
||||||
|
|
||||||
float _score;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void extractTerms(Set<Term> terms) {}
|
public void extractTerms(Set<Term> terms) {}
|
||||||
|
|
||||||
|
@ -889,6 +963,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
|
float _score[] = new float[]{boost};
|
||||||
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
|
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
|
||||||
CheckedFunction<Integer, Query, IOException> leaf = queryStore.getQueries(context);
|
CheckedFunction<Integer, Query, IOException> leaf = queryStore.getQueries(context);
|
||||||
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
|
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
|
||||||
|
@ -900,7 +975,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
TopDocs topDocs = percolatorIndexSearcher.search(query, 1);
|
TopDocs topDocs = percolatorIndexSearcher.search(query, 1);
|
||||||
if (topDocs.totalHits > 0) {
|
if (topDocs.totalHits > 0) {
|
||||||
if (needsScores) {
|
if (needsScores) {
|
||||||
_score = topDocs.scoreDocs[0].score;
|
_score[0] = topDocs.scoreDocs[0].score;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -911,11 +986,21 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
return new FilterScorer(new ConstantScoreScorer(this, 1f, memoryIndexIterator)) {
|
return new Scorer(this) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return memoryIndexIterator.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return memoryIndexIterator;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public float score() throws IOException {
|
||||||
return _score;
|
return _score[0];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -922,7 +922,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
assertThat(values.get(3), equalTo("field\0value4"));
|
assertThat(values.get(3), equalTo("field\0value4"));
|
||||||
assertThat(values.get(4), equalTo("field\0value5"));
|
assertThat(values.get(4), equalTo("field\0value5"));
|
||||||
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
|
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
|
||||||
assertThat(msm, equalTo(3));
|
assertThat(msm, equalTo(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static byte[] subByteArray(byte[] source, int offset, int length) {
|
private static byte[] subByteArray(byte[] source, int offset, int length) {
|
||||||
|
|
Loading…
Reference in New Issue