LUCENE-9816: lazy-init LZ4-HC hashtable in BlockTreeTermsWriter

LZ4-HC hashtable is heavy (128kb int[] + 128kb short[]) and must be
filled with special values on initialization. This is a lot of overhead
for fields that might not use the compression at all.

Don't initialize this for a field until we see hints that the data might
be compressible and need to use the table in order to test it out.
This commit is contained in:
Robert Muir 2021-02-28 17:54:30 -05:00
parent 96eb043131
commit dade99cb4d
No known key found for this signature in database
GPG Key ID: 817AE1DD322D7ECA
1 changed files with 4 additions and 2 deletions

View File

@ -917,6 +917,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// it out if the // it out if the
// average suffix length is greater than 6. // average suffix length is greater than 6.
if (suffixWriter.length() > 6L * numEntries) { if (suffixWriter.length() > 6L * numEntries) {
if (compressionHashTable == null) {
compressionHashTable = new LZ4.HighCompressionHashTable();
}
LZ4.compress( LZ4.compress(
suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable); suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable);
if (spareWriter.size() < suffixWriter.length() - (suffixWriter.length() >>> 2)) { if (spareWriter.size() < suffixWriter.length() - (suffixWriter.length() >>> 2)) {
@ -1139,8 +1142,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance(); private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput spareWriter = ByteBuffersDataOutput.newResettableInstance(); private final ByteBuffersDataOutput spareWriter = ByteBuffersDataOutput.newResettableInstance();
private byte[] spareBytes = BytesRef.EMPTY_BYTES; private byte[] spareBytes = BytesRef.EMPTY_BYTES;
private final LZ4.HighCompressionHashTable compressionHashTable = private LZ4.HighCompressionHashTable compressionHashTable;
new LZ4.HighCompressionHashTable();
} }
private boolean closed; private boolean closed;