mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 08:19:23 +00:00
Deduplicate min and max term in single-term FieldReader (#13618)
I noticed that single-term readers are an edge case but not that uncommon in Elasticsearch heap dumps. It seems quite common to have a constant value for some field across a complete segment (e.g. a version value that is repeated endlessly in logs). Seems simple enough to deduplicate here to save a couple MB of heap.
This commit is contained in:
parent
ca098e63b9
commit
47650a4314
@ -200,6 +200,11 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
||||
final int docCount = metaIn.readVInt();
|
||||
BytesRef minTerm = readBytesRef(metaIn);
|
||||
BytesRef maxTerm = readBytesRef(metaIn);
|
||||
if (numTerms == 1) {
|
||||
assert maxTerm.equals(minTerm);
|
||||
// save heap for edge case of a single term only so min == max
|
||||
maxTerm = minTerm;
|
||||
}
|
||||
if (docCount < 0
|
||||
|| docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
||||
throw new CorruptIndexException(
|
||||
|
@ -598,8 +598,6 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||
private final ByteBuffersDataOutput scratchBytes = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
|
||||
|
||||
static final BytesRef EMPTY_BYTES_REF = new BytesRef();
|
||||
|
||||
private static class StatsWriter {
|
||||
|
||||
private final DataOutput out;
|
||||
|
Loading…
x
Reference in New Issue
Block a user