Ensure nested documents have consistent version and seq_ids (#27455)
Today we index dummy values for seq_ids and version on nested documents. This is on the one hand trappy since users can request these values via inner hits and on the other hand not necessarily good for compression since the dummy value will likely not compress well when seqIDs are lowish. This change ensures that we share the same field values for all documents in a nested block. This won't have any overhead, in-fact it might be more efficient since we even reduce the work needed slightly.
This commit is contained in:
parent
d3e3bc8656
commit
720e96e288
|
@ -245,15 +245,18 @@ public class SeqNoFieldMapper extends MetadataFieldMapper {
|
|||
|
||||
@Override
|
||||
public void postParse(ParseContext context) throws IOException {
|
||||
// In the case of nested docs, let's fill nested docs with seqNo=1 and
|
||||
// primaryTerm=0 so that Lucene doesn't write a Bitset for documents
|
||||
// that don't have the field. This is consistent with the default value
|
||||
// In the case of nested docs, let's fill nested docs with the original
|
||||
// so that Lucene doesn't write a Bitset for documents that
|
||||
// don't have the field. This is consistent with the default value
|
||||
// for efficiency.
|
||||
// we share the parent docs fields to ensure good compression
|
||||
SequenceIDFields seqID = context.seqID();
|
||||
assert seqID != null;
|
||||
for (int i = 1; i < context.docs().size(); i++) {
|
||||
final Document doc = context.docs().get(i);
|
||||
doc.add(new LongPoint(NAME, 1));
|
||||
doc.add(new NumericDocValuesField(NAME, 1L));
|
||||
doc.add(new NumericDocValuesField(PRIMARY_TERM_NAME, 0L));
|
||||
doc.add(seqID.seqNo);
|
||||
doc.add(seqID.seqNoDocValue);
|
||||
doc.add(seqID.primaryTerm);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -126,9 +126,11 @@ public class VersionFieldMapper extends MetadataFieldMapper {
|
|||
public void postParse(ParseContext context) throws IOException {
|
||||
// In the case of nested docs, let's fill nested docs with version=1 so that Lucene doesn't write a Bitset for documents
|
||||
// that don't have the field. This is consistent with the default value for efficiency.
|
||||
Field version = context.version();
|
||||
assert version != null;
|
||||
for (int i = 1; i < context.docs().size(); i++) {
|
||||
final Document doc = context.docs().get(i);
|
||||
doc.add(new NumericDocValuesField(NAME, 1L));
|
||||
doc.add(version);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,3 +41,60 @@ setup:
|
|||
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._nested.offset: 0 }
|
||||
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0._nested.child
|
||||
|
||||
|
||||
---
|
||||
"Nested doc version and seqIDs":
|
||||
|
||||
- skip:
|
||||
# fixed in 7.0
|
||||
version: " - 6.99.99"
|
||||
reason: "version and seq IDs where not accurate in previous versions"
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: type_1
|
||||
id: 1
|
||||
body:
|
||||
"nested_field" : [ { "foo": "bar" } ]
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
- do:
|
||||
search:
|
||||
body: { "query" : { "nested" : { "path" : "nested_field", "query" : { "match_all" : {} }, "inner_hits" : { version: true, "docvalue_fields": ["_seq_no"]} }}, "version": true, "docvalue_fields" : ["_seq_no"] }
|
||||
|
||||
- match: { hits.total: 1 }
|
||||
- match: { hits.hits.0._index: "test" }
|
||||
- match: { hits.hits.0._type: "type_1" }
|
||||
- match: { hits.hits.0._id: "1" }
|
||||
- match: { hits.hits.0._version: 1 }
|
||||
- match: { hits.hits.0.fields._seq_no: [0] }
|
||||
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [0] }
|
||||
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: type_1
|
||||
id: 1
|
||||
body:
|
||||
"nested_field" : [ { "foo": "baz" } ]
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
- do:
|
||||
search:
|
||||
body: { "query" : { "nested" : { "path" : "nested_field", "query" : { "match_all" : {} }, "inner_hits" : { version: true, "docvalue_fields": ["_seq_no"]} }}, "version": true, "docvalue_fields" : ["_seq_no"] }
|
||||
|
||||
- match: { hits.total: 1 }
|
||||
- match: { hits.hits.0._index: "test" }
|
||||
- match: { hits.hits.0._type: "type_1" }
|
||||
- match: { hits.hits.0._id: "1" }
|
||||
- match: { hits.hits.0._version: 2 }
|
||||
- match: { hits.hits.0.fields._seq_no: [1] }
|
||||
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._version: 2 }
|
||||
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [1] }
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue