Make sure shared source always represents the top-level root document. (#66725)

We started passing down the root document's _source when processing
nested hits, to avoid reloading and reparsing the root source for each hit.
Unfortunately the approach did not work when there are multiple layers of
`inner_hits`. In this case, the second-layer inner hit received its immediate
parent's source instead of the root source. This parent source is filtered to
just contain the parts corresponding to the nested document, but the source
parsing logic is designed to always operate on the top-level root source. This
caused failures when loading the second-layer inner hits.

This PR makes sure to always pass the root document's _source when processing
inner hits, even if there are multiple layers.
This commit is contained in:
Julie Tibshirani 2021-01-05 08:17:26 -08:00
parent 0fad7f6fc6
commit ff67baac38
4 changed files with 158 additions and 11 deletions

View File

@ -92,3 +92,69 @@ setup:
- match: { hits.hits.0.fields._seq_no: [1] }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._version: 2 }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [1] }
---
"Inner hits with disabled _source":
- do:
indices.create:
index: disabled_source
body:
mappings:
_source:
enabled: false
properties:
nested_field:
type: nested
properties:
sub_nested_field:
type: nested
- do:
index:
index: disabled_source
id: 1
body:
nested_field:
field: value
sub_nested_field:
field: value
- do:
indices.refresh: {}
- do:
search:
index: disabled_source
rest_total_hits_as_int: true
body:
query:
nested:
path: "nested_field.sub_nested_field"
query: { match_all: {}}
inner_hits: {}
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._source
- do:
search:
index: disabled_source
rest_total_hits_as_int: true
body:
query:
nested:
path: "nested_field"
inner_hits: {}
query:
nested:
path: "nested_field.sub_nested_field"
query: { match_all: {}}
inner_hits: {}
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0._source
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._source

View File

@ -279,10 +279,14 @@ public class InnerHitsIT extends ESIntegTestCase {
requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject()
.field("title", "quick brown fox")
.startArray("comments")
.startObject()
.field("message", "fox eat quick")
.startArray("remarks").startObject().field("message", "good").endObject().endArray()
.endObject()
.startObject()
.field("message", "fox eat quick")
.startArray("remarks").startObject().field("message", "good").endObject().endArray()
.endObject()
.startObject()
.field("message", "hippo is hungry")
.startArray("remarks").startObject().field("message", "neutral").endObject().endArray()
.endObject()
.endArray()
.endObject()));
requests.add(client().prepareIndex("articles", "article", "2").setSource(jsonBuilder().startObject()
@ -296,6 +300,7 @@ public class InnerHitsIT extends ESIntegTestCase {
.endObject()));
indexRandom(true, requests);
// Check we can load the first doubly-nested document.
SearchResponse response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
@ -322,6 +327,33 @@ public class InnerHitsIT extends ESIntegTestCase {
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));
// Check we can load the second doubly-nested document.
response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "neutral"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")),
ScoreMode.Avg).innerHit(new InnerHitBuilder())
).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertSearchHit(response, 1, hasId("1"));
assertThat(response.getHits().getAt(0).getInnerHits().size(), equalTo(1));
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
assertThat(innerHits.getTotalHits().value, equalTo(1L));
assertThat(innerHits.getHits().length, equalTo(1));
assertThat(innerHits.getAt(0).getId(), equalTo("1"));
assertThat(innerHits.getAt(0).getNestedIdentity().getField().string(), equalTo("comments"));
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(1));
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertThat(innerHits.getTotalHits().value, equalTo(1L));
assertThat(innerHits.getHits().length, equalTo(1));
assertThat(innerHits.getAt(0).getId(), equalTo("1"));
assertThat(innerHits.getAt(0).getNestedIdentity().getField().string(), equalTo("comments"));
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(1));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));
// Directly refer to the second level:
response = client().prepareSearch("articles")
.setQuery(nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "bad"), ScoreMode.Avg)
@ -364,6 +396,34 @@ public class InnerHitsIT extends ESIntegTestCase {
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(0));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));
// Check that inner hits contain _source even when it's disabled on the parent request.
response = client().prepareSearch("articles")
.setFetchSource(false)
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "good"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")), ScoreMode.Avg)
.innerHit(new InnerHitBuilder())
).get();
assertNoFailures(response);
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertNotNull(innerHits.getAt(0).getSourceAsMap());
assertFalse(innerHits.getAt(0).getSourceAsMap().isEmpty());
response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "good"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")), ScoreMode.Avg)
.innerHit(new InnerHitBuilder().setFetchSourceContext(new FetchSourceContext(false)))
).get();
assertNoFailures(response);
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertNotNull(innerHits.getAt(0).getSourceAsMap());
assertFalse(innerHits.getAt(0).getSourceAsMap().isEmpty());
}
// Issue #9723

View File

@ -29,11 +29,13 @@ import org.elasticsearch.search.fetch.subphase.FetchFieldsContext;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.FieldAndFormat;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext.InnerHitSubContext;
import org.elasticsearch.search.fetch.subphase.ScriptFieldsContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.SourceLookup;
import org.elasticsearch.search.rescore.RescoreContext;
import java.util.Collections;
@ -204,4 +206,23 @@ public class FetchContext {
public SearchExtBuilder getSearchExt(String name) {
return searchContext.getSearchExt(name);
}
/**
* For a hit document that's being processed, return the source lookup representing the
* root document. This method is used to pass down the root source when processing this
* document's nested inner hits.
*
* @param hitContext The context of the hit that's being processed.
*/
public SourceLookup getRootSourceLookup(FetchSubPhase.HitContext hitContext) {
// Usually the root source simply belongs to the hit we're processing. But if
// there are multiple layers of inner hits and we're in a nested context, then
// the root source is found on the inner hits context.
if (searchContext instanceof InnerHitSubContext && hitContext.hit().getNestedIdentity() != null) {
InnerHitSubContext innerHitsContext = (InnerHitSubContext) searchContext;
return innerHitsContext.getRootLookup();
} else {
return hitContext.sourceLookup();
}
}
}

View File

@ -59,16 +59,16 @@ public final class InnerHitsPhase implements FetchSubPhase {
@Override
public void process(HitContext hitContext) throws IOException {
hitExecute(innerHits, hitContext);
SearchHit hit = hitContext.hit();
SourceLookup rootLookup = searchContext.getRootSourceLookup(hitContext);
hitExecute(innerHits, hit, rootLookup);
}
};
}
private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHits, HitContext hitContext) throws IOException {
SearchHit hit = hitContext.hit();
SourceLookup sourceLookup = hitContext.sourceLookup();
private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHits,
SearchHit hit,
SourceLookup rootLookup) throws IOException {
for (Map.Entry<String, InnerHitsContext.InnerHitSubContext> entry : innerHits.entrySet()) {
InnerHitsContext.InnerHitSubContext innerHitsContext = entry.getValue();
TopDocsAndMaxScore topDoc = innerHitsContext.topDocs(hit);
@ -84,7 +84,7 @@ public final class InnerHitsPhase implements FetchSubPhase {
}
innerHitsContext.docIdsToLoad(docIdsToLoad, 0, docIdsToLoad.length);
innerHitsContext.setRootId(new Uid(hit.getType(), hit.getId()));
innerHitsContext.setRootLookup(sourceLookup);
innerHitsContext.setRootLookup(rootLookup);
fetchPhase.execute(innerHitsContext);
FetchSearchResult fetchResult = innerHitsContext.fetchResult();