mirror of https://github.com/apache/lucene.git
LUCENE-9816: lazy-init LZ4-HC hashtable in BlockTreeTermsWriter
LZ4-HC hashtable is heavy (128kb int[] + 128kb short[]) and must be filled with special values on initialization. This is a lot of overhead for fields that might not use the compression at all. Don't initialize this for a field until we see hints that the data might be compressible and need to use the table in order to test it out.
This commit is contained in:
parent
96eb043131
commit
dade99cb4d
|
@ -917,6 +917,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// it out if the
|
||||
// average suffix length is greater than 6.
|
||||
if (suffixWriter.length() > 6L * numEntries) {
|
||||
if (compressionHashTable == null) {
|
||||
compressionHashTable = new LZ4.HighCompressionHashTable();
|
||||
}
|
||||
LZ4.compress(
|
||||
suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable);
|
||||
if (spareWriter.size() < suffixWriter.length() - (suffixWriter.length() >>> 2)) {
|
||||
|
@ -1139,8 +1142,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput spareWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private byte[] spareBytes = BytesRef.EMPTY_BYTES;
|
||||
private final LZ4.HighCompressionHashTable compressionHashTable =
|
||||
new LZ4.HighCompressionHashTable();
|
||||
private LZ4.HighCompressionHashTable compressionHashTable;
|
||||
}
|
||||
|
||||
private boolean closed;
|
||||
|
|
Loading…
Reference in New Issue