percolator: Do not take duplicate query extractions into account for minimum_should_match attribute

If a percolator query contains duplicate query clauses somewhere in the query tree then
when these clauses are extracted then they should not affect the msm.

This can lead a percolator query that should be a valid match not become a candidate match,
because at query time, the msm that is being used by the CoveringQuery would never match with
the msm used at index time.

Closes #28315
This commit is contained in:
Martijn van Groningen 2018-01-24 08:51:34 +01:00
parent 1d01bcf421
commit 204f4022c2
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
4 changed files with 207 additions and 2 deletions

View File

@ -380,7 +380,21 @@ final class QueryAnalyzer {
msm += 1;
}
} else {
msm += result.minimumShouldMatch;
// In case that there are duplicate query extractions we need to be careful with incrementing msm,
// because that could lead to valid matches not becoming candidate matches:
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
// doc: field: val1 val2 val3
// So lets be protective and decrease the msm:
int resultMsm = result.minimumShouldMatch;
for (QueryExtraction queryExtraction : result.extractions) {
if (extractions.contains(queryExtraction)) {
// To protect against negative msm:
// (sub results could consist out of disjunction and conjunction and
// then we do not know which extraction contributed to msm)
resultMsm = Math.max(0, resultMsm - 1);
}
}
msm += resultMsm;
}
verified &= result.verified;
matchAllDocs &= result.matchAllDocs;
@ -519,10 +533,16 @@ final class QueryAnalyzer {
if (subResult.matchAllDocs) {
numMatchAllClauses++;
}
int resultMsm = subResult.minimumShouldMatch;
for (QueryExtraction extraction : subResult.extractions) {
if (terms.contains(extraction)) {
resultMsm = Math.max(1, resultMsm - 1);
}
}
msmPerClause[i] = resultMsm;
terms.addAll(subResult.extractions);
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
msmPerClause[i] = subResult.minimumShouldMatch;
if (subResult.extractions.size() == 1 && t[0].range != null) {
rangeFieldNames[i] = t[0].range.fieldName;
}

View File

@ -72,6 +72,7 @@ import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.Version;
import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -622,6 +623,55 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
}
}
public void testDuplicatedClauses() throws Exception {
List<ParseContext.Document> docs = new ArrayList<>();
BooleanQuery.Builder builder = new BooleanQuery.Builder();
BooleanQuery.Builder builder1 = new BooleanQuery.Builder();
builder1.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
builder1.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
builder.add(builder1.build(), BooleanClause.Occur.MUST);
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
builder2.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
builder2.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
builder.add(builder2.build(), BooleanClause.Occur.MUST);
addQuery(builder.build(), docs);
builder = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2);
builder1 = new BooleanQuery.Builder();
builder1.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
builder1.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
builder.add(builder1.build(), BooleanClause.Occur.SHOULD);
builder2 = new BooleanQuery.Builder();
builder2.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
builder2.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
builder.add(builder2.build(), BooleanClause.Occur.SHOULD);
BooleanQuery.Builder builder3 = new BooleanQuery.Builder();
builder3.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
builder3.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST);
builder.add(builder3.build(), BooleanClause.Occur.SHOULD);
addQuery(builder.build(), docs);
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
shardSearcher.setQueryCache(null);
Version v = Version.CURRENT;
List<BytesReference> sources = Collections.singletonList(new BytesArray("{}"));
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value1 value2 value3", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore, sources, percolateSearcher, v);
TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true);
assertEquals(2L, topDocs.totalHits);
assertEquals(0, topDocs.scoreDocs[0].doc);
assertEquals(1, topDocs.scoreDocs[1].doc);
}
private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
boolean requireScore = randomBoolean();
IndexSearcher percolateSearcher = memoryIndex.createSearcher();

View File

@ -100,8 +100,10 @@ import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
@ -850,6 +852,79 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
}
}
public void testDuplicatedClauses() throws Exception {
addQueryFieldMappings();
QueryBuilder qb = boolQuery()
.must(boolQuery().must(termQuery("field", "value1")).must(termQuery("field", "value2")))
.must(boolQuery().must(termQuery("field", "value2")).must(termQuery("field", "value3")));
ParsedDocument doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
XContentFactory.jsonBuilder().startObject()
.field(fieldName, qb)
.endObject().bytes(),
XContentType.JSON));
List<String> values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
.map(f -> f.binaryValue().utf8ToString())
.sorted()
.collect(Collectors.toList());
assertThat(values.size(), equalTo(3));
assertThat(values.get(0), equalTo("field\0value1"));
assertThat(values.get(1), equalTo("field\0value2"));
assertThat(values.get(2), equalTo("field\0value3"));
int msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
assertThat(msm, equalTo(3));
qb = boolQuery()
.must(boolQuery().must(termQuery("field", "value1")).must(termQuery("field", "value2")))
.must(boolQuery().must(termQuery("field", "value2")).must(termQuery("field", "value3")))
.must(boolQuery().must(termQuery("field", "value3")).must(termQuery("field", "value4")))
.must(boolQuery().should(termQuery("field", "value4")).should(termQuery("field", "value5")));
doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
XContentFactory.jsonBuilder().startObject()
.field(fieldName, qb)
.endObject().bytes(),
XContentType.JSON));
values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
.map(f -> f.binaryValue().utf8ToString())
.sorted()
.collect(Collectors.toList());
assertThat(values.size(), equalTo(5));
assertThat(values.get(0), equalTo("field\0value1"));
assertThat(values.get(1), equalTo("field\0value2"));
assertThat(values.get(2), equalTo("field\0value3"));
assertThat(values.get(3), equalTo("field\0value4"));
assertThat(values.get(4), equalTo("field\0value5"));
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
assertThat(msm, equalTo(4));
qb = boolQuery()
.minimumShouldMatch(3)
.should(boolQuery().should(termQuery("field", "value1")).should(termQuery("field", "value2")))
.should(boolQuery().should(termQuery("field", "value2")).should(termQuery("field", "value3")))
.should(boolQuery().should(termQuery("field", "value3")).should(termQuery("field", "value4")))
.should(boolQuery().should(termQuery("field", "value4")).should(termQuery("field", "value5")));
doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
XContentFactory.jsonBuilder().startObject()
.field(fieldName, qb)
.endObject().bytes(),
XContentType.JSON));
values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
.map(f -> f.binaryValue().utf8ToString())
.sorted()
.collect(Collectors.toList());
assertThat(values.size(), equalTo(5));
assertThat(values.get(0), equalTo("field\0value1"));
assertThat(values.get(1), equalTo("field\0value2"));
assertThat(values.get(2), equalTo("field\0value3"));
assertThat(values.get(3), equalTo("field\0value4"));
assertThat(values.get(4), equalTo("field\0value5"));
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
assertThat(msm, equalTo(3));
}
private static byte[] subByteArray(byte[] source, int offset, int length) {
return Arrays.copyOfRange(source, offset, offset + length);
}

View File

@ -1108,6 +1108,66 @@ public class QueryAnalyzerTests extends ESTestCase {
assertEquals("_field1", new ArrayList<>(result.extractions).get(1).range.fieldName);
}
public void testExtractQueryMetadata_duplicatedClauses() {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
.build(),
BooleanClause.Occur.MUST
);
builder.add(
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
.build(),
BooleanClause.Occur.MUST
);
builder.add(
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST)
.build(),
BooleanClause.Occur.MUST
);
Result result = analyze(builder.build(), Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(4));
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
new Term("field", "value3"), new Term("field", "value4"));
builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(2);
builder.add(
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
.build(),
BooleanClause.Occur.SHOULD
);
builder.add(
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
.build(),
BooleanClause.Occur.SHOULD
);
builder.add(
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST)
.build(),
BooleanClause.Occur.SHOULD
);
result = analyze(builder.build(), Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(2));
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
new Term("field", "value3"), new Term("field", "value4"));
}
private static void assertDimension(byte[] expected, Consumer<byte[]> consumer) {
byte[] dest = new byte[expected.length];
consumer.accept(dest);