Reduce FST block size for BlockTreeTermsWriter (#12604)

This commit is contained in:
gf2121 2023-10-04 01:58:56 -05:00 committed by GitHub
parent 75da33836b
commit 96052891e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 0 deletions

View File

@ -167,6 +167,9 @@ Optimizations
* GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng) * GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng)
* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
to reduce GC load during indexing. (Guo Feng)
Changes in runtime behavior Changes in runtime behavior
--------------------- ---------------------

View File

@ -52,6 +52,7 @@ import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/* /*
TODO: TODO:
@ -490,10 +491,22 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
} }
} }
long estimateSize = prefix.length;
for (PendingBlock block : blocks) {
if (block.subIndices != null) {
for (FST<BytesRef> subIndex : block.subIndices) {
estimateSize += subIndex.numBytes();
}
}
}
int estimateBitsRequired = PackedInts.bitsRequired(estimateSize);
int pageBits = Math.min(15, Math.max(6, estimateBitsRequired));
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final FSTCompiler<BytesRef> fstCompiler = final FSTCompiler<BytesRef> fstCompiler =
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs) new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
.shouldShareNonSingletonNodes(false) .shouldShareNonSingletonNodes(false)
.bytesPageBits(pageBits)
.build(); .build();
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix); // System.out.println(" compile index for prefix=" + prefix);

View File

@ -520,6 +520,10 @@ public final class FST<T> implements Accountable {
bytes.finish(); bytes.finish();
} }
public long numBytes() {
return bytes.getPosition();
}
public T getEmptyOutput() { public T getEmptyOutput() {
return emptyOutput; return emptyOutput;
} }