Fix nested document support in percolator query (#58149)

This commit ensures that we filter out nested documents
when retrieving the document slots of a matching query.

Closes #52850
This commit is contained in:
Jim Ferenczi 2020-06-17 22:29:59 +02:00 committed by jimczi
parent 5e0b00f022
commit a19213dcca
2 changed files with 39 additions and 14 deletions

View File

@ -20,6 +20,8 @@ package org.elasticsearch.percolator;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -69,12 +71,8 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
for (PercolateQuery percolateQuery : percolateQueries) { for (PercolateQuery percolateQuery : percolateQueries) {
String fieldName = singlePercolateQuery ? FIELD_NAME_PREFIX : FIELD_NAME_PREFIX + "_" + percolateQuery.getName(); String fieldName = singlePercolateQuery ? FIELD_NAME_PREFIX : FIELD_NAME_PREFIX + "_" + percolateQuery.getName();
IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher(); IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
// there is a bug in lucene's MemoryIndex that doesn't allow us to use docValues here... Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(Version.CURRENT));
// See https://issues.apache.org/jira/browse/LUCENE-8055 Weight weight = percolatorIndexSearcher.createWeight(nonNestedFilter, ScoreMode.COMPLETE_NO_SCORES, 1f);
// for now we just use version 6.0 version to find nested parent
final Version version = Version.V_6_0_0; //context.mapperService().getIndexSettings().getIndexVersionCreated();
Weight weight = percolatorIndexSearcher.createWeight(percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(version)),
ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0)); Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc(); int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
BitSet rootDocs = BitSet.of(s.iterator(), memoryIndexMaxDoc); BitSet rootDocs = BitSet.of(s.iterator(), memoryIndexMaxDoc);
@ -94,6 +92,13 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
// This is not a document with a percolator field. // This is not a document with a percolator field.
continue; continue;
} }
if (hasNestedDocs) {
// Ensures that we filter out nested documents
query = new BooleanQuery.Builder()
.add(query, BooleanClause.Occur.MUST)
.add(nonNestedFilter, BooleanClause.Occur.FILTER)
.build();
}
TopDocs topDocs = percolatorIndexSearcher.search(query, memoryIndexMaxDoc, new Sort(SortField.FIELD_DOC)); TopDocs topDocs = percolatorIndexSearcher.search(query, memoryIndexMaxDoc, new Sort(SortField.FIELD_DOC));
if (topDocs.totalHits.value == 0) { if (topDocs.totalHits.value == 0) {

View File

@ -751,6 +751,7 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
public void testPercolateQueryWithNestedDocuments() throws Exception { public void testPercolateQueryWithNestedDocuments() throws Exception {
XContentBuilder mapping = XContentFactory.jsonBuilder(); XContentBuilder mapping = XContentFactory.jsonBuilder();
mapping.startObject().startObject("properties").startObject("query").field("type", "percolator").endObject() mapping.startObject().startObject("properties").startObject("query").field("type", "percolator").endObject()
.startObject("id").field("type", "keyword").endObject()
.startObject("companyname").field("type", "text").endObject().startObject("employee").field("type", "nested") .startObject("companyname").field("type", "text").endObject().startObject("employee").field("type", "nested")
.startObject("properties").startObject("name").field("type", "text").endObject().endObject().endObject().endObject() .startObject("properties").startObject("name").field("type", "text").endObject().endObject().endObject().endObject()
.endObject(); .endObject();
@ -758,16 +759,24 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.addMapping("employee", mapping) .addMapping("employee", mapping)
); );
client().prepareIndex("test", "employee", "q1").setSource(jsonBuilder().startObject() client().prepareIndex("test", "employee", "q1").setSource(jsonBuilder().startObject()
.field("id", "q1")
.field("query", QueryBuilders.nestedQuery("employee", .field("query", QueryBuilders.nestedQuery("employee",
QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg) QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
).endObject()) ).endObject())
.get(); .get();
// this query should never match as it doesn't use nested query: // this query should never match as it doesn't use nested query:
client().prepareIndex("test", "employee", "q2").setSource(jsonBuilder().startObject() client().prepareIndex("test", "employee", "q2").setSource(jsonBuilder().startObject()
.field("id", "q2")
.field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject()) .field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject())
.get(); .get();
client().admin().indices().prepareRefresh().get(); client().admin().indices().prepareRefresh().get();
client().prepareIndex("test", "employee", "q3").setSource(jsonBuilder().startObject()
.field("id", "q3")
.field("query", QueryBuilders.matchAllQuery()).endObject())
.get();
client().admin().indices().prepareRefresh().get();
SearchResponse response = client().prepareSearch() SearchResponse response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", .setQuery(new PercolateQueryBuilder("query",
BytesReference.bytes(XContentFactory.jsonBuilder() BytesReference.bytes(XContentFactory.jsonBuilder()
@ -777,10 +786,11 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.startObject().field("name", "tony stark").endObject() .startObject().field("name", "tony stark").endObject()
.endArray() .endArray()
.endObject()), XContentType.JSON)) .endObject()), XContentType.JSON))
.addSort("_doc", SortOrder.ASC) .addSort("id", SortOrder.ASC)
.get(); .get();
assertHitCount(response, 1); assertHitCount(response, 2);
assertThat(response.getHits().getAt(0).getId(), equalTo("q1")); assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
assertThat(response.getHits().getAt(1).getId(), equalTo("q3"));
response = client().prepareSearch() response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", .setQuery(new PercolateQueryBuilder("query",
@ -791,17 +801,19 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.startObject().field("name", "tony stark").endObject() .startObject().field("name", "tony stark").endObject()
.endArray() .endArray()
.endObject()), XContentType.JSON)) .endObject()), XContentType.JSON))
.addSort("_doc", SortOrder.ASC) .addSort("id", SortOrder.ASC)
.get(); .get();
assertHitCount(response, 0); assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("q3"));
response = client().prepareSearch() response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", .setQuery(new PercolateQueryBuilder("query",
BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject()), BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject()),
XContentType.JSON)) XContentType.JSON))
.addSort("_doc", SortOrder.ASC) .addSort("id", SortOrder.ASC)
.get(); .get();
assertHitCount(response, 0); assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("q3"));
response = client().prepareSearch() response = client().prepareSearch()
.setQuery(new PercolateQueryBuilder("query", Arrays.asList( .setQuery(new PercolateQueryBuilder("query", Arrays.asList(
@ -818,13 +830,21 @@ public class PercolatorQuerySearchIT extends ESIntegTestCase {
.startObject().field("name", "peter parker").endObject() .startObject().field("name", "peter parker").endObject()
.startObject().field("name", "virginia potts").endObject() .startObject().field("name", "virginia potts").endObject()
.endArray() .endArray()
.endObject()),
BytesReference.bytes(XContentFactory.jsonBuilder()
.startObject().field("companyname", "stark")
.startArray("employee")
.startObject().field("name", "peter parker").endObject()
.endArray()
.endObject()) .endObject())
), XContentType.JSON)) ), XContentType.JSON))
.addSort("_doc", SortOrder.ASC) .addSort("id", SortOrder.ASC)
.get(); .get();
assertHitCount(response, 1); assertHitCount(response, 2);
assertThat(response.getHits().getAt(0).getId(), equalTo("q1")); assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
assertThat(response.getHits().getAt(0).getFields().get("_percolator_document_slot").getValues(), equalTo(Arrays.asList(0, 1))); assertThat(response.getHits().getAt(0).getFields().get("_percolator_document_slot").getValues(), equalTo(Arrays.asList(0, 1)));
assertThat(response.getHits().getAt(1).getId(), equalTo("q3"));
assertThat(response.getHits().getAt(1).getFields().get("_percolator_document_slot").getValues(), equalTo(Arrays.asList(0, 1, 2)));
} }
public void testPercolatorQueryViaMultiSearch() throws Exception { public void testPercolatorQueryViaMultiSearch() throws Exception {