mirror of https://github.com/apache/lucene.git
Deduplicate bytes for `FieldReader#rootCode` (#13610)
Looking at how these instances are serialized to disk it appears that the empty output in the FST metadata is always the same as the rootCode bytes. Without changing the serialization we could at least deduplicate here, saving hundreds of MB in some high-segment count use cases I observed in ES.
This commit is contained in:
parent
255a2fcf9c
commit
ca098e63b9
|
@ -78,7 +78,6 @@ public final class FieldReader extends Terms {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.rootCode = rootCode;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
// if (DEBUG) {
|
||||
|
@ -100,6 +99,14 @@ public final class FieldReader extends Terms {
|
|||
w.close();
|
||||
}
|
||||
*/
|
||||
BytesRef emptyOutput = metadata.getEmptyOutput();
|
||||
if (rootCode.equals(emptyOutput) == false) {
|
||||
// TODO: this branch is never taken
|
||||
assert false;
|
||||
this.rootCode = rootCode;
|
||||
} else {
|
||||
this.rootCode = emptyOutput;
|
||||
}
|
||||
}
|
||||
|
||||
long readVLongOutput(DataInput in) throws IOException {
|
||||
|
|
Loading…
Reference in New Issue