Fix regressions around nested hits and disabled _source. (#66572)

This PR fixes two bugs that can arise when _source is disabled and we fetch nested documents:
* Fix exception when highlighting `inner_hits` with disabled _source.
* Fix exception in nested `top_hits` with disabled _source.
* Add more tests for highlighting `inner_hits`.
This commit is contained in:
Julie Tibshirani 2020-12-18 14:06:52 -08:00
parent df8c92cfef
commit d4039228ae
3 changed files with 196 additions and 6 deletions

View File

@ -56,6 +56,7 @@ setup:
- do:
search:
rest_total_hits_as_int: true
index: my-index
body:
aggs:
to-users:
@ -81,12 +82,62 @@ setup:
- match: { aggregations.to-users.users.hits.hits.2._nested.field: users }
- match: { aggregations.to-users.users.hits.hits.2._nested.offset: 1 }
---
"top_hits aggregation with nested documents and disabled _source":
- skip:
version: " - 7.99.99"
reason: "bug fix is not yet backported"
- do:
indices.create:
index: disabled-source
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_source:
enabled: false
properties:
users:
type: nested
- do:
index:
index: disabled-source
id: 1
refresh: true
body:
users:
- first: "John"
last: "Smith"
- do:
search:
index: disabled-source
rest_total_hits_as_int: true
body:
aggs:
to-users:
nested:
path: users
aggs:
users:
top_hits: {}
- match: { hits.total: 1 }
- length: { aggregations.to-users.users.hits.hits: 1 }
- match: { aggregations.to-users.users.hits.hits.0._id: "1" }
- match: { aggregations.to-users.users.hits.hits.0._index: disabled-source }
- match: { aggregations.to-users.users.hits.hits.0._nested.field: users }
- match: { aggregations.to-users.users.hits.hits.0._nested.offset: 0 }
- is_false: aggregations.to-users.users.hits.hits.0._source
---
"top_hits aggregation with sequence numbers":
- do:
search:
index: my-index
rest_total_hits_as_int: true
body:
aggs:

View File

@ -0,0 +1,137 @@
setup:
- do:
indices.create:
index: test
body:
mappings:
_source:
excludes: ["nested.stored_only"]
properties:
nested:
type: nested
properties:
field:
type: text
fields:
vectors:
type: text
term_vector: "with_positions_offsets"
postings:
type: text
index_options: "offsets"
stored:
type: text
store: true
stored_only:
type: text
store: true
- do:
index:
index: test
id: 1
refresh: true
body:
nested:
field : "The quick brown fox is brown."
stored : "The quick brown fox is brown."
stored_only : "The quick brown fox is brown."
---
"Unified highlighter":
- do:
search:
index: test
body:
query:
nested:
path: "nested"
query:
multi_match:
query: "quick brown fox"
fields: [ "nested.field", "nested.field.vectors", "nested.field.postings" ]
inner_hits:
highlight:
type: "unified"
fields:
nested.field: {}
nested.field.vectors: {}
nested.field.postings: {}
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>." }
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.vectors.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>." }
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>." }
---
"Unified highlighter with stored fields":
- do:
search:
index: test
body:
query:
nested:
path: "nested"
query:
multi_match:
query: "quick brown fox"
fields: [ "nested.stored", "nested.stored_only" ]
inner_hits:
highlight:
type: "unified"
fields:
nested.stored: {}
nested.stored_only: {}
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>." }
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>." }
---
"Unified highlighter with stored fields and disabled source":
- skip:
version: " - 7.99.99"
reason: "bug fix is not yet backported"
- do:
indices.create:
index: disabled_source
body:
mappings:
_source:
enabled: false
properties:
nested:
type: nested
properties:
field:
type: text
stored_only:
type: text
store: true
- do:
index:
index: disabled_source
id: 1
refresh: true
body:
nested:
field: "The quick brown fox is brown."
stored_only: "The quick brown fox is brown."
- do:
search:
index: disabled_source
body:
query:
nested:
path: "nested"
query:
multi_match:
query: "quick brown fox"
fields: ["nested.field", "nested.stored_only"]
inner_hits:
highlight:
type: "unified"
fields:
nested.field: {}
nested.stored_only: {}
- is_false: hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}

View File

@ -32,7 +32,6 @@ import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.elasticsearch.Version;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
@ -378,10 +377,13 @@ public class FetchPhase {
rootId = rootFieldsVisitor.uid();
if (needSource) {
BytesReference rootSource = rootFieldsVisitor.source();
Tuple<XContentType, Map<String, Object>> tuple = XContentHelper.convertToMap(rootSource, false);
rootSourceAsMap = tuple.v2();
rootSourceContentType = tuple.v1();
if (rootFieldsVisitor.source() != null) {
Tuple<XContentType, Map<String, Object>> tuple = XContentHelper.convertToMap(rootFieldsVisitor.source(), false);
rootSourceAsMap = tuple.v2();
rootSourceContentType = tuple.v1();
} else {
rootSourceAsMap = Collections.emptyMap();
}
}
}
@ -413,7 +415,7 @@ public class FetchPhase {
nestedDocId,
new SourceLookup()); // Use a clean, fresh SourceLookup for the nested context
if (rootSourceAsMap != null) {
if (rootSourceAsMap != null && rootSourceAsMap.isEmpty() == false) {
// Isolate the nested json array object that matches with nested hit and wrap it back into the same json
// structure with the nested json array object being the actual content. The latter is important, so that
// features like source filtering and highlighting work consistent regardless of whether the field points