Fix nested document support in percolator query (#58149)

This commit ensures that we filter out nested documents
when retrieving the document slots of a matching query.

Closes #52850
This commit is contained in:
Jim Ferenczi 2020-06-17 22:29:59 +02:00 committed by jimczi
parent 5e0b00f022
commit a19213dcca
2 changed files with 39 additions and 14 deletions

View File

@ -20,6 +20,8 @@ package org.elasticsearch.percolator;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
@ -69,12 +71,8 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
for (PercolateQuery percolateQuery : percolateQueries) {
String fieldName = singlePercolateQuery ? FIELD_NAME_PREFIX : FIELD_NAME_PREFIX + "_" + percolateQuery.getName();
IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
// there is a bug in lucene's MemoryIndex that doesn't allow us to use docValues here...
// See https://issues.apache.org/jira/browse/LUCENE-8055
// for now we just use version 6.0 version to find nested parent
final Version version = Version.V_6_0_0; //context.mapperService().getIndexSettings().getIndexVersionCreated();
Weight weight = percolatorIndexSearcher.createWeight(percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(version)),
ScoreMode.COMPLETE_NO_SCORES, 1f);
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(Version.CURRENT));
Weight weight = percolatorIndexSearcher.createWeight(nonNestedFilter, ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
BitSet rootDocs = BitSet.of(s.iterator(), memoryIndexMaxDoc);
@ -94,6 +92,13 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
// This is not a document with a percolator field.
continue;
}
if (hasNestedDocs) {
// Ensures that we filter out nested documents
query = new BooleanQuery.Builder()
.add(query, BooleanClause.Occur.MUST)
.add(nonNestedFilter, BooleanClause.Occur.FILTER)
.build();
}
TopDocs topDocs = percolatorIndexSearcher.search(query, memoryIndexMaxDoc, new Sort(SortField.FIELD_DOC));
if (topDocs.totalHits.value == 0) {

View File

@ -751,6 +751,7 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
public void testPercolateQueryWithNestedDocuments() throws Exception {
XContentBuilder mapping = XContentFactory.jsonBuilder();
mapping.startObject().startObject("properties").startObject("query").field("type", "percolator").endObject()
.startObject("id").field("type", "keyword").endObject()
.startObject("companyname").field("type", "text").endObject().startObject("employee").field("type", "nested")
.startObject("properties").startObject("name").field("type", "text").endObject().endObject().endObject().endObject()
.endObject();
@ -758,16 +759,24 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.addMapping("employee", mapping)
);
client().prepareIndex("test", "employee", "q1").setSource(jsonBuilder().startObject()
.field("id", "q1")
.field("query", QueryBuilders.nestedQuery("employee",
QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
).endObject())
.get();
// this query should never match as it doesn't use nested query:
client().prepareIndex("test", "employee", "q2").setSource(jsonBuilder().startObject()
.field("id", "q2")
.field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject())
.get();
client().admin().indices().prepareRefresh().get();
client().prepareIndex("test", "employee", "q3").setSource(jsonBuilder().startObject()
.field("id", "q3")
.field("query", QueryBuilders.matchAllQuery()).endObject())
.get();
client().admin().indices().prepareRefresh().get();
SearchResponse response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query",
BytesReference.bytes(XContentFactory.jsonBuilder()
@ -777,10 +786,11 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.startObject().field("name", "tony stark").endObject()
.endArray()
.endObject()), XContentType.JSON))
.addSort("_doc", SortOrder.ASC)
.addSort("id", SortOrder.ASC)
.get();
assertHitCount(response, 1);
assertHitCount(response, 2);
assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
assertThat(response.getHits().getAt(1).getId(), equalTo("q3"));
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query",
@ -791,17 +801,19 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.startObject().field("name", "tony stark").endObject()
.endArray()
.endObject()), XContentType.JSON))
.addSort("_doc", SortOrder.ASC)
.addSort("id", SortOrder.ASC)
.get();
assertHitCount(response, 0);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("q3"));
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query",
BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject()),
XContentType.JSON))
.addSort("_doc", SortOrder.ASC)
.addSort("id", SortOrder.ASC)
.get();
assertHitCount(response, 0);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("q3"));
response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", Arrays.asList(
@ -818,13 +830,21 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.startObject().field("name", "peter parker").endObject()
.startObject().field("name", "virginia potts").endObject()
.endArray()
.endObject()),
BytesReference.bytes(XContentFactory.jsonBuilder()
.startObject().field("companyname", "stark")
.startArray("employee")
.startObject().field("name", "peter parker").endObject()
.endArray()
.endObject())
), XContentType.JSON))
.addSort("_doc", SortOrder.ASC)
.addSort("id", SortOrder.ASC)
.get();
assertHitCount(response, 1);
assertHitCount(response, 2);
assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
assertThat(response.getHits().getAt(0).getFields().get("_percolator_document_slot").getValues(), equalTo(Arrays.asList(0, 1)));
assertThat(response.getHits().getAt(1).getId(), equalTo("q3"));
assertThat(response.getHits().getAt(1).getFields().get("_percolator_document_slot").getValues(), equalTo(Arrays.asList(0, 1, 2)));
}
public void testPercolatorQueryViaMultiSearch() throws Exception {