mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 16:29:30 +00:00
Deduplicate min and max term in single-term FieldReader (#13618)
I noticed that single-term readers are an edge case but not that uncommon in Elasticsearch heap dumps. It seems quite common to have a constant value for some field across a complete segment (e.g. a version value that is repeated endlessly in logs). Seems simple enough to deduplicate here to save a couple MB of heap.
This commit is contained in:
parent
ca098e63b9
commit
47650a4314
@ -200,6 +200,11 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
|||||||
final int docCount = metaIn.readVInt();
|
final int docCount = metaIn.readVInt();
|
||||||
BytesRef minTerm = readBytesRef(metaIn);
|
BytesRef minTerm = readBytesRef(metaIn);
|
||||||
BytesRef maxTerm = readBytesRef(metaIn);
|
BytesRef maxTerm = readBytesRef(metaIn);
|
||||||
|
if (numTerms == 1) {
|
||||||
|
assert maxTerm.equals(minTerm);
|
||||||
|
// save heap for edge case of a single term only so min == max
|
||||||
|
maxTerm = minTerm;
|
||||||
|
}
|
||||||
if (docCount < 0
|
if (docCount < 0
|
||||||
|| docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
|| docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
||||||
throw new CorruptIndexException(
|
throw new CorruptIndexException(
|
||||||
|
@ -598,8 +598,6 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||||||
private final ByteBuffersDataOutput scratchBytes = ByteBuffersDataOutput.newResettableInstance();
|
private final ByteBuffersDataOutput scratchBytes = ByteBuffersDataOutput.newResettableInstance();
|
||||||
private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
|
private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
|
||||||
|
|
||||||
static final BytesRef EMPTY_BYTES_REF = new BytesRef();
|
|
||||||
|
|
||||||
private static class StatsWriter {
|
private static class StatsWriter {
|
||||||
|
|
||||||
private final DataOutput out;
|
private final DataOutput out;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user