Improved percolator's random candidate query duel test and

fixed bugs that were exposed by this:

* Duplicates query leafs were not detected in a multi level boolean query
* Tracking fields for numeric range queries did not work properly.
* The sorting that was used to find the less restrictive clauses in
  disjunction query did not work too.
This commit is contained in:
Martijn van Groningen 2018-02-26 16:03:38 +01:00
parent bc8b3fc71c
commit bcfb7ab591
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
3 changed files with 199 additions and 98 deletions

View File

@ -54,6 +54,7 @@ import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -61,6 +62,7 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import java.util.stream.Collectors;
import static java.util.stream.Collectors.toSet; import static java.util.stream.Collectors.toSet;
@ -366,36 +368,38 @@ final class QueryAnalyzer {
Set<QueryExtraction> extractions = new HashSet<>(); Set<QueryExtraction> extractions = new HashSet<>();
Set<String> seenRangeFields = new HashSet<>(); Set<String> seenRangeFields = new HashSet<>();
for (Result result : results) { for (Result result : results) {
QueryExtraction[] t = result.extractions.toArray(new QueryExtraction[1]); // In case that there are duplicate query extractions we need to be careful with incrementing msm,
if (result.extractions.size() == 1 && t[0].range != null) { // because that could lead to valid matches not becoming candidate matches:
// In case of range queries each extraction does not simply increment the minimum_should_match // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
// for that percolator query like for a term based extraction, so that can lead to more false // doc: field: val1 val2 val3
// positives for percolator queries with range queries than term based queries. // So lets be protective and decrease the msm:
// The is because the way number fields are extracted from the document to be percolated. int resultMsm = result.minimumShouldMatch;
// Per field a single range is extracted and if a percolator query has two or more range queries for (QueryExtraction queryExtraction : result.extractions) {
// on the same field, then the minimum should match can be higher than clauses in the CoveringQuery. if (queryExtraction.range != null) {
// Therefore right now the minimum should match is incremented once per number field when processing // In case of range queries each extraction does not simply increment the minimum_should_match
// the percolator query at index time. // for that percolator query like for a term based extraction, so that can lead to more false
if (seenRangeFields.add(t[0].range.fieldName)) { // positives for percolator queries with range queries than term based queries.
msm += 1; // The is because the way number fields are extracted from the document to be percolated.
} // Per field a single range is extracted and if a percolator query has two or more range queries
} else { // on the same field, then the minimum should match can be higher than clauses in the CoveringQuery.
// In case that there are duplicate query extractions we need to be careful with incrementing msm, // Therefore right now the minimum should match is incremented once per number field when processing
// because that could lead to valid matches not becoming candidate matches: // the percolator query at index time.
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3) if (seenRangeFields.add(queryExtraction.range.fieldName)) {
// doc: field: val1 val2 val3 resultMsm = 1;
// So lets be protective and decrease the msm: } else {
int resultMsm = result.minimumShouldMatch; resultMsm = 0;
for (QueryExtraction queryExtraction : result.extractions) {
if (extractions.contains(queryExtraction)) {
// To protect against negative msm:
// (sub results could consist out of disjunction and conjunction and
// then we do not know which extraction contributed to msm)
resultMsm = Math.max(0, resultMsm - 1);
} }
} }
msm += resultMsm;
if (extractions.contains(queryExtraction)) {
// To protect against negative msm:
// (sub results could consist out of disjunction and conjunction and
// then we do not know which extraction contributed to msm)
resultMsm = Math.max(0, resultMsm - 1);
}
} }
msm += resultMsm;
verified &= result.verified; verified &= result.verified;
matchAllDocs &= result.matchAllDocs; matchAllDocs &= result.matchAllDocs;
extractions.addAll(result.extractions); extractions.addAll(result.extractions);
@ -518,8 +522,7 @@ final class QueryAnalyzer {
private static Result handleDisjunction(List<Query> disjunctions, int requiredShouldClauses, boolean otherClauses, private static Result handleDisjunction(List<Query> disjunctions, int requiredShouldClauses, boolean otherClauses,
Version version) { Version version) {
// Keep track of the msm for each clause: // Keep track of the msm for each clause:
int[] msmPerClause = new int[disjunctions.size()]; List<DisjunctionClause> clauses = new ArrayList<>(disjunctions.size());
String[] rangeFieldNames = new String[disjunctions.size()];
boolean verified = otherClauses == false; boolean verified = otherClauses == false;
if (version.before(Version.V_6_1_0)) { if (version.before(Version.V_6_1_0)) {
verified &= requiredShouldClauses <= 1; verified &= requiredShouldClauses <= 1;
@ -535,17 +538,14 @@ final class QueryAnalyzer {
} }
int resultMsm = subResult.minimumShouldMatch; int resultMsm = subResult.minimumShouldMatch;
for (QueryExtraction extraction : subResult.extractions) { for (QueryExtraction extraction : subResult.extractions) {
if (terms.contains(extraction)) { if (terms.add(extraction) == false) {
resultMsm = Math.max(1, resultMsm - 1); resultMsm = Math.max(0, resultMsm - 1);
} }
} }
msmPerClause[i] = resultMsm; clauses.add(new DisjunctionClause(resultMsm, subResult.extractions.stream()
terms.addAll(subResult.extractions); .filter(extraction -> extraction.range != null)
.map(extraction -> extraction.range.fieldName)
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]); .collect(toSet())));
if (subResult.extractions.size() == 1 && t[0].range != null) {
rangeFieldNames[i] = t[0].range.fieldName;
}
} }
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses; boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
@ -554,15 +554,20 @@ final class QueryAnalyzer {
Set<String> seenRangeFields = new HashSet<>(); Set<String> seenRangeFields = new HashSet<>();
// Figure out what the combined msm is for this disjunction: // Figure out what the combined msm is for this disjunction:
// (sum the lowest required clauses, otherwise we're too strict and queries may not match) // (sum the lowest required clauses, otherwise we're too strict and queries may not match)
Arrays.sort(msmPerClause); clauses = clauses.stream()
int limit = Math.min(msmPerClause.length, Math.max(1, requiredShouldClauses)); .filter(o -> o.msm > 0)
.sorted(Comparator.comparingInt(o -> o.msm))
.collect(Collectors.toList());
int limit = Math.min(clauses.size(), Math.max(1, requiredShouldClauses));
for (int i = 0; i < limit; i++) { for (int i = 0; i < limit; i++) {
if (rangeFieldNames[i] != null) { if (clauses.get(i).rangeFieldNames.isEmpty() == false) {
if (seenRangeFields.add(rangeFieldNames[i])) { for (String rangeField: clauses.get(i).rangeFieldNames) {
msm += 1; if (seenRangeFields.add(rangeField)) {
msm += 1;
}
} }
} else { } else {
msm += msmPerClause[i]; msm += clauses.get(i).msm;
} }
} }
} else { } else {
@ -575,6 +580,17 @@ final class QueryAnalyzer {
} }
} }
static class DisjunctionClause {
final int msm;
final Set<String> rangeFieldNames;
DisjunctionClause(int msm, Set<String> rangeFieldNames) {
this.msm = msm;
this.rangeFieldNames = rangeFieldNames;
}
}
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) { static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
assert extractions1 != null || extractions2 != null; assert extractions1 != null || extractions2 != null;
if (extractions1 == null) { if (extractions1 == null) {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.document.HalfFloatPoint;
import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
@ -36,7 +37,9 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.queries.BlendedTermQuery;
@ -44,12 +47,10 @@ import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.CoveringQuery; import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.FilteredDocIdSetIterator; import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
@ -76,11 +77,14 @@ import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
@ -166,59 +170,53 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
public void testDuel() throws Exception { public void testDuel() throws Exception {
int numFields = randomIntBetween(1, 3); int numFields = randomIntBetween(1, 3);
Map<String, List<String>> content = new HashMap<>(); Map<String, List<String>> stringContent = new HashMap<>();
for (int i = 0; i < numFields; i++) { for (int i = 0; i < numFields; i++) {
int numTokens = randomIntBetween(1, 64); int numTokens = randomIntBetween(1, 64);
List<String> values = new ArrayList<>(); List<String> values = new ArrayList<>();
for (int j = 0; j < numTokens; j++) { for (int j = 0; j < numTokens; j++) {
values.add(randomAlphaOfLength(8)); values.add(randomAlphaOfLength(8));
} }
content.put("field" + i, values); stringContent.put("field" + i, values);
} }
List<String> fields = new ArrayList<>(content.keySet()); List<String> stringFields = new ArrayList<>(stringContent.keySet());
int numValues = randomIntBetween(16, 64);
List<Integer> intValues = new ArrayList<>(numValues);
for (int j = 0; j < numValues; j++) {
intValues.add(randomInt());
}
Collections.sort(intValues);
MappedFieldType intFieldType = mapperService.documentMapper("type").mappers()
.getMapper("int_field").fieldType();
List<Supplier<Query>> queryFunctions = new ArrayList<>(); List<Supplier<Query>> queryFunctions = new ArrayList<>();
queryFunctions.add(MatchNoDocsQuery::new); queryFunctions.add(MatchNoDocsQuery::new);
queryFunctions.add(MatchAllDocsQuery::new); queryFunctions.add(MatchAllDocsQuery::new);
queryFunctions.add(() -> new TermQuery(new Term("unknown_field", "value"))); queryFunctions.add(() -> new TermQuery(new Term("unknown_field", "value")));
String field1 = randomFrom(fields); String field1 = randomFrom(stringFields);
queryFunctions.add(() -> new TermQuery(new Term(field1, randomFrom(content.get(field1))))); queryFunctions.add(() -> new TermQuery(new Term(field1, randomFrom(stringContent.get(field1)))));
String field2 = randomFrom(fields); String field2 = randomFrom(stringFields);
queryFunctions.add(() -> new TermQuery(new Term(field2, randomFrom(content.get(field2))))); queryFunctions.add(() -> new TermQuery(new Term(field2, randomFrom(stringContent.get(field2)))));
queryFunctions.add(() -> new TermInSetQuery(field1, new BytesRef(randomFrom(content.get(field1))), queryFunctions.add(() -> intFieldType.termQuery(randomFrom(intValues), null));
new BytesRef(randomFrom(content.get(field1))))); queryFunctions.add(() -> intFieldType.termsQuery(Arrays.asList(randomFrom(intValues), randomFrom(intValues)), null));
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(content.get(field1))), queryFunctions.add(() -> intFieldType.rangeQuery(intValues.get(4), intValues.get(intValues.size() - 4), true,
new BytesRef(randomFrom(content.get(field1))))); true, ShapeRelation.WITHIN, null, null, null));
queryFunctions.add(() -> { queryFunctions.add(() -> new TermInSetQuery(field1, new BytesRef(randomFrom(stringContent.get(field1))),
BooleanQuery.Builder builder = new BooleanQuery.Builder(); new BytesRef(randomFrom(stringContent.get(field1)))));
int numClauses = randomIntBetween(1, 16); queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))),
for (int i = 0; i < numClauses; i++) { new BytesRef(randomFrom(stringContent.get(field1)))));
if (rarely()) { int numRandomBoolQueries = randomIntBetween(16, 32);
if (randomBoolean()) { for (int i = 0; i < numRandomBoolQueries; i++) {
Occur occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD)); queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues));
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), occur); }
} else {
String field = randomFrom(fields);
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), Occur.MUST_NOT);
}
} else {
if (randomBoolean()) {
Occur occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
String field = randomFrom(fields);
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur);
} else {
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), Occur.MUST_NOT);
}
}
}
return builder.build();
});
queryFunctions.add(() -> { queryFunctions.add(() -> {
int numClauses = randomIntBetween(1, 16); int numClauses = randomIntBetween(1, 16);
List<Query> clauses = new ArrayList<>(); List<Query> clauses = new ArrayList<>();
for (int i = 0; i < numClauses; i++) { for (int i = 0; i < numClauses; i++) {
String field = randomFrom(fields); String field = randomFrom(stringFields);
clauses.add(new TermQuery(new Term(field, randomFrom(content.get(field))))); clauses.add(new TermQuery(new Term(field, randomFrom(stringContent.get(field)))));
} }
return new DisjunctionMaxQuery(clauses, 0.01f); return new DisjunctionMaxQuery(clauses, 0.01f);
}); });
@ -237,14 +235,75 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
shardSearcher.setQueryCache(null); shardSearcher.setQueryCache(null);
Document document = new Document(); Document document = new Document();
for (Map.Entry<String, List<String>> entry : content.entrySet()) { for (Map.Entry<String, List<String>> entry : stringContent.entrySet()) {
String value = entry.getValue().stream().collect(Collectors.joining(" ")); String value = entry.getValue().stream().collect(Collectors.joining(" "));
document.add(new TextField(entry.getKey(), value, Field.Store.NO)); document.add(new TextField(entry.getKey(), value, Field.Store.NO));
} }
for (Integer intValue : intValues) {
List<Field> numberFields =
NumberFieldMapper.NumberType.INTEGER.createFields("int_field", intValue, true, true, false);
for (Field numberField : numberFields) {
document.add(numberField);
}
}
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer()); MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher); duelRun(queryStore, memoryIndex, shardSearcher);
} }
private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Map<String, List<String>> content,
MappedFieldType intFieldType, List<Integer> intValues) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
int numClauses = randomIntBetween(1, 16);
int numShouldClauses = 0;
boolean onlyShouldClauses = rarely();
for (int i = 0; i < numClauses; i++) {
Occur occur;
if (onlyShouldClauses) {
occur = Occur.SHOULD;
if (randomBoolean()) {
String field = randomFrom(fields);
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur);
} else {
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur);
}
} else if (rarely() && depth <= 3) {
occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
builder.add(createRandomBooleanQuery(depth + 1, fields, content, intFieldType, intValues), occur);
} else if (rarely()) {
if (randomBoolean()) {
occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
if (randomBoolean()) {
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), occur);
} else {
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur);
}
} else if (randomBoolean()) {
String field = randomFrom(fields);
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur = Occur.MUST_NOT);
} else {
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur = Occur.MUST_NOT);
}
} else {
if (randomBoolean()) {
occur = randomFrom(Arrays.asList(Occur.FILTER, Occur.MUST, Occur.SHOULD));
if (randomBoolean()) {
String field = randomFrom(fields);
builder.add(new TermQuery(new Term(field, randomFrom(content.get(field)))), occur);
} else {
builder.add(intFieldType.termQuery(randomFrom(intValues), null), occur);
}
} else {
builder.add(new TermQuery(new Term("unknown_field", randomAlphaOfLength(8))), occur = Occur.MUST_NOT);
}
}
if (occur == Occur.SHOULD) {
numShouldClauses++;
}
}
builder.setMinimumNumberShouldMatch(numShouldClauses);
return builder.build();
}
public void testDuelIdBased() throws Exception { public void testDuelIdBased() throws Exception {
List<Function<String, Query>> queryFunctions = new ArrayList<>(); List<Function<String, Query>> queryFunctions = new ArrayList<>();
queryFunctions.add((id) -> new PrefixQuery(new Term("field", id))); queryFunctions.add((id) -> new PrefixQuery(new Term("field", id)));
@ -766,11 +825,11 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
Query percolateQuery = fieldType.percolateQuery("_name", queryStore, Query percolateQuery = fieldType.percolateQuery("_name", queryStore,
Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT); Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT);
Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery); Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery);
TopDocs topDocs = shardSearcher.search(query, 10); TopDocs topDocs = shardSearcher.search(query, 100);
Query controlQuery = new ControlQuery(memoryIndex, queryStore); Query controlQuery = new ControlQuery(memoryIndex, queryStore);
controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery); controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10); TopDocs controlTopDocs = shardSearcher.search(controlQuery, 100);
try { try {
assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits)); assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
@ -793,22 +852,39 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
logger.error("controlTopDocs.scoreDocs.length={}", controlTopDocs.scoreDocs.length); logger.error("controlTopDocs.scoreDocs.length={}", controlTopDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) { for (int i = 0; i < topDocs.scoreDocs.length; i++) {
logger.error("topDocs.scoreDocs[j].doc={}", topDocs.scoreDocs[i].doc); logger.error("topDocs.scoreDocs[{}].doc={}", i, topDocs.scoreDocs[i].doc);
logger.error("topDocs.scoreDocs[j].score={}", topDocs.scoreDocs[i].score); logger.error("topDocs.scoreDocs[{}].score={}", i, topDocs.scoreDocs[i].score);
} }
for (int i = 0; i < controlTopDocs.scoreDocs.length; i++) { for (int i = 0; i < controlTopDocs.scoreDocs.length; i++) {
logger.error("controlTopDocs.scoreDocs[j].doc={}", controlTopDocs.scoreDocs[i].doc); logger.error("controlTopDocs.scoreDocs[{}].doc={}", i, controlTopDocs.scoreDocs[i].doc);
logger.error("controlTopDocs.scoreDocs[j].score={}", controlTopDocs.scoreDocs[i].score); logger.error("controlTopDocs.scoreDocs[{}].score={}", i, controlTopDocs.scoreDocs[i].score);
// Additional stored information that is useful when debugging:
String queryToString = shardSearcher.doc(controlTopDocs.scoreDocs[i].doc).get("query_to_string");
logger.error("topDocs.scoreDocs[{}].query_to_string={}", i, queryToString);
NumericDocValues numericValues =
MultiDocValues.getNumericValues(shardSearcher.getIndexReader(), fieldType.minimumShouldMatchField.name());
boolean exact = numericValues.advanceExact(controlTopDocs.scoreDocs[i].doc);
if (exact) {
logger.error("controlTopDocs.scoreDocs[{}].minimum_should_match_field={}", i, numericValues.longValue());
} else {
// Some queries do not have a msm field. (e.g. unsupported queries)
logger.error("controlTopDocs.scoreDocs[{}].minimum_should_match_field=[NO_VALUE]", i);
}
} }
throw ae; throw ae;
} }
} }
private void addQuery(Query query, List<ParseContext.Document> docs) throws IOException { private void addQuery(Query query, List<ParseContext.Document> docs) {
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
mapperService.documentMapperParser(), documentMapper, null, null); mapperService.documentMapperParser(), documentMapper, null, null);
fieldMapper.processQuery(query, parseContext); fieldMapper.processQuery(query, parseContext);
docs.add(parseContext.doc()); ParseContext.Document queryDocument = parseContext.doc();
// Add to string representation of the query to make debugging easier:
queryDocument.add(new StoredField("query_to_string", query.toString()));
docs.add(queryDocument);
queries.add(query); queries.add(query);
} }
@ -865,8 +941,6 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
final IndexSearcher percolatorIndexSearcher = memoryIndex.createSearcher(); final IndexSearcher percolatorIndexSearcher = memoryIndex.createSearcher();
return new Weight(this) { return new Weight(this) {
float _score;
@Override @Override
public void extractTerms(Set<Term> terms) {} public void extractTerms(Set<Term> terms) {}
@ -889,6 +963,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
float _score[] = new float[]{boost};
DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc()); DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
CheckedFunction<Integer, Query, IOException> leaf = queryStore.getQueries(context); CheckedFunction<Integer, Query, IOException> leaf = queryStore.getQueries(context);
FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) { FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
@ -900,7 +975,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
TopDocs topDocs = percolatorIndexSearcher.search(query, 1); TopDocs topDocs = percolatorIndexSearcher.search(query, 1);
if (topDocs.totalHits > 0) { if (topDocs.totalHits > 0) {
if (needsScores) { if (needsScores) {
_score = topDocs.scoreDocs[0].score; _score[0] = topDocs.scoreDocs[0].score;
} }
return true; return true;
} else { } else {
@ -911,11 +986,21 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
} }
} }
}; };
return new FilterScorer(new ConstantScoreScorer(this, 1f, memoryIndexIterator)) { return new Scorer(this) {
@Override
public int docID() {
return memoryIndexIterator.docID();
}
@Override
public DocIdSetIterator iterator() {
return memoryIndexIterator;
}
@Override @Override
public float score() throws IOException { public float score() throws IOException {
return _score; return _score[0];
} }
}; };
} }

View File

@ -922,7 +922,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
assertThat(values.get(3), equalTo("field\0value4")); assertThat(values.get(3), equalTo("field\0value4"));
assertThat(values.get(4), equalTo("field\0value5")); assertThat(values.get(4), equalTo("field\0value5"));
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue(); msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
assertThat(msm, equalTo(3)); assertThat(msm, equalTo(1));
} }
private static byte[] subByteArray(byte[] source, int offset, int length) { private static byte[] subByteArray(byte[] source, int offset, int length) {