percolator: Do not take duplicate query extractions into account for minimum_should_match attribute
If a percolator query contains duplicate query clauses somewhere in the query tree then when these clauses are extracted then they should not affect the msm. This can lead a percolator query that should be a valid match not become a candidate match, because at query time, the msm that is being used by the CoveringQuery would never match with the msm used at index time. Closes #28315
This commit is contained in:
parent
1d01bcf421
commit
204f4022c2
|
@ -380,7 +380,21 @@ final class QueryAnalyzer {
|
|||
msm += 1;
|
||||
}
|
||||
} else {
|
||||
msm += result.minimumShouldMatch;
|
||||
// In case that there are duplicate query extractions we need to be careful with incrementing msm,
|
||||
// because that could lead to valid matches not becoming candidate matches:
|
||||
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
|
||||
// doc: field: val1 val2 val3
|
||||
// So lets be protective and decrease the msm:
|
||||
int resultMsm = result.minimumShouldMatch;
|
||||
for (QueryExtraction queryExtraction : result.extractions) {
|
||||
if (extractions.contains(queryExtraction)) {
|
||||
// To protect against negative msm:
|
||||
// (sub results could consist out of disjunction and conjunction and
|
||||
// then we do not know which extraction contributed to msm)
|
||||
resultMsm = Math.max(0, resultMsm - 1);
|
||||
}
|
||||
}
|
||||
msm += resultMsm;
|
||||
}
|
||||
verified &= result.verified;
|
||||
matchAllDocs &= result.matchAllDocs;
|
||||
|
@ -519,10 +533,16 @@ final class QueryAnalyzer {
|
|||
if (subResult.matchAllDocs) {
|
||||
numMatchAllClauses++;
|
||||
}
|
||||
int resultMsm = subResult.minimumShouldMatch;
|
||||
for (QueryExtraction extraction : subResult.extractions) {
|
||||
if (terms.contains(extraction)) {
|
||||
resultMsm = Math.max(1, resultMsm - 1);
|
||||
}
|
||||
}
|
||||
msmPerClause[i] = resultMsm;
|
||||
terms.addAll(subResult.extractions);
|
||||
|
||||
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
|
||||
msmPerClause[i] = subResult.minimumShouldMatch;
|
||||
if (subResult.extractions.size() == 1 && t[0].range != null) {
|
||||
rangeFieldNames[i] = t[0].range.fieldName;
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.CheckedFunction;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
|
@ -622,6 +623,55 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testDuplicatedClauses() throws Exception {
|
||||
List<ParseContext.Document> docs = new ArrayList<>();
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
BooleanQuery.Builder builder1 = new BooleanQuery.Builder();
|
||||
builder1.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
|
||||
builder1.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
|
||||
builder.add(builder1.build(), BooleanClause.Occur.MUST);
|
||||
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
|
||||
builder2.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
|
||||
builder2.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
|
||||
builder.add(builder2.build(), BooleanClause.Occur.MUST);
|
||||
addQuery(builder.build(), docs);
|
||||
|
||||
builder = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(2);
|
||||
builder1 = new BooleanQuery.Builder();
|
||||
builder1.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
|
||||
builder1.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
|
||||
builder.add(builder1.build(), BooleanClause.Occur.SHOULD);
|
||||
builder2 = new BooleanQuery.Builder();
|
||||
builder2.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
|
||||
builder2.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
|
||||
builder.add(builder2.build(), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery.Builder builder3 = new BooleanQuery.Builder();
|
||||
builder3.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
|
||||
builder3.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST);
|
||||
builder.add(builder3.build(), BooleanClause.Occur.SHOULD);
|
||||
addQuery(builder.build(), docs);
|
||||
|
||||
indexWriter.addDocuments(docs);
|
||||
indexWriter.close();
|
||||
directoryReader = DirectoryReader.open(directory);
|
||||
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
||||
shardSearcher.setQueryCache(null);
|
||||
|
||||
Version v = Version.CURRENT;
|
||||
List<BytesReference> sources = Collections.singletonList(new BytesArray("{}"));
|
||||
|
||||
MemoryIndex memoryIndex = new MemoryIndex();
|
||||
memoryIndex.addField("field", "value1 value2 value3", new WhitespaceAnalyzer());
|
||||
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
|
||||
PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore, sources, percolateSearcher, v);
|
||||
TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true);
|
||||
assertEquals(2L, topDocs.totalHits);
|
||||
assertEquals(0, topDocs.scoreDocs[0].doc);
|
||||
assertEquals(1, topDocs.scoreDocs[1].doc);
|
||||
}
|
||||
|
||||
private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
|
||||
boolean requireScore = randomBoolean();
|
||||
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
|
||||
|
|
|
@ -100,8 +100,10 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
||||
|
@ -850,6 +852,79 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testDuplicatedClauses() throws Exception {
|
||||
addQueryFieldMappings();
|
||||
|
||||
QueryBuilder qb = boolQuery()
|
||||
.must(boolQuery().must(termQuery("field", "value1")).must(termQuery("field", "value2")))
|
||||
.must(boolQuery().must(termQuery("field", "value2")).must(termQuery("field", "value3")));
|
||||
ParsedDocument doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
|
||||
XContentFactory.jsonBuilder().startObject()
|
||||
.field(fieldName, qb)
|
||||
.endObject().bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
List<String> values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
|
||||
.map(f -> f.binaryValue().utf8ToString())
|
||||
.sorted()
|
||||
.collect(Collectors.toList());
|
||||
assertThat(values.size(), equalTo(3));
|
||||
assertThat(values.get(0), equalTo("field\0value1"));
|
||||
assertThat(values.get(1), equalTo("field\0value2"));
|
||||
assertThat(values.get(2), equalTo("field\0value3"));
|
||||
int msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
|
||||
assertThat(msm, equalTo(3));
|
||||
|
||||
qb = boolQuery()
|
||||
.must(boolQuery().must(termQuery("field", "value1")).must(termQuery("field", "value2")))
|
||||
.must(boolQuery().must(termQuery("field", "value2")).must(termQuery("field", "value3")))
|
||||
.must(boolQuery().must(termQuery("field", "value3")).must(termQuery("field", "value4")))
|
||||
.must(boolQuery().should(termQuery("field", "value4")).should(termQuery("field", "value5")));
|
||||
doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
|
||||
XContentFactory.jsonBuilder().startObject()
|
||||
.field(fieldName, qb)
|
||||
.endObject().bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
|
||||
.map(f -> f.binaryValue().utf8ToString())
|
||||
.sorted()
|
||||
.collect(Collectors.toList());
|
||||
assertThat(values.size(), equalTo(5));
|
||||
assertThat(values.get(0), equalTo("field\0value1"));
|
||||
assertThat(values.get(1), equalTo("field\0value2"));
|
||||
assertThat(values.get(2), equalTo("field\0value3"));
|
||||
assertThat(values.get(3), equalTo("field\0value4"));
|
||||
assertThat(values.get(4), equalTo("field\0value5"));
|
||||
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
|
||||
assertThat(msm, equalTo(4));
|
||||
|
||||
qb = boolQuery()
|
||||
.minimumShouldMatch(3)
|
||||
.should(boolQuery().should(termQuery("field", "value1")).should(termQuery("field", "value2")))
|
||||
.should(boolQuery().should(termQuery("field", "value2")).should(termQuery("field", "value3")))
|
||||
.should(boolQuery().should(termQuery("field", "value3")).should(termQuery("field", "value4")))
|
||||
.should(boolQuery().should(termQuery("field", "value4")).should(termQuery("field", "value5")));
|
||||
doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
|
||||
XContentFactory.jsonBuilder().startObject()
|
||||
.field(fieldName, qb)
|
||||
.endObject().bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
|
||||
.map(f -> f.binaryValue().utf8ToString())
|
||||
.sorted()
|
||||
.collect(Collectors.toList());
|
||||
assertThat(values.size(), equalTo(5));
|
||||
assertThat(values.get(0), equalTo("field\0value1"));
|
||||
assertThat(values.get(1), equalTo("field\0value2"));
|
||||
assertThat(values.get(2), equalTo("field\0value3"));
|
||||
assertThat(values.get(3), equalTo("field\0value4"));
|
||||
assertThat(values.get(4), equalTo("field\0value5"));
|
||||
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
|
||||
assertThat(msm, equalTo(3));
|
||||
}
|
||||
|
||||
private static byte[] subByteArray(byte[] source, int offset, int length) {
|
||||
return Arrays.copyOfRange(source, offset, offset + length);
|
||||
}
|
||||
|
|
|
@ -1108,6 +1108,66 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
assertEquals("_field1", new ArrayList<>(result.extractions).get(1).range.fieldName);
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_duplicatedClauses() {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
BooleanClause.Occur.MUST
|
||||
);
|
||||
builder.add(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
BooleanClause.Occur.MUST
|
||||
);
|
||||
builder.add(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
BooleanClause.Occur.MUST
|
||||
);
|
||||
Result result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(4));
|
||||
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
|
||||
new Term("field", "value3"), new Term("field", "value4"));
|
||||
|
||||
builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(2);
|
||||
builder.add(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
BooleanClause.Occur.SHOULD
|
||||
);
|
||||
builder.add(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
BooleanClause.Occur.SHOULD
|
||||
);
|
||||
builder.add(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
BooleanClause.Occur.SHOULD
|
||||
);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(2));
|
||||
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
|
||||
new Term("field", "value3"), new Term("field", "value4"));
|
||||
}
|
||||
|
||||
private static void assertDimension(byte[] expected, Consumer<byte[]> consumer) {
|
||||
byte[] dest = new byte[expected.length];
|
||||
consumer.accept(dest);
|
||||
|
|
Loading…
Reference in New Issue