mirror of https://github.com/apache/lucene.git
Tune the amount of memory that is allocated to sorting postings upon flushing. (#12011)
When flushing segments that have an index sort configured, postings lists get loaded into arrays and get reordered according to the index sort. This reordering is implemented with `TimSorter`, a variant of merge sort. Like merge sort, an important part of `TimSorter` consists of merging two contiguous sorted slices of the array into a combined sorted slice. This merging can be done either with external memory, which is the classical approach, or in-place, which still runs in linear time but with a much higher factor. Until now we were allocating a fixed budget of `maxDoc/64` for doing these merges with external memory. If this is not enough, sorted slices would be merged in place. I've been looking at some profiles recently for an index where a non-negligible chunk of the time was spent on in-place merges. So I would like to propose the following change: - Increase the maximum RAM budget to `maxDoc / 8`. This should help avoid in-place merges for all postings up to `docFreq = maxDoc / 4`. - Make this RAM budget lazily allocated, rather than eagerly like today. This would help not allocate memory in O(maxDoc) for fields like primary keys that only have a couple postings per term. So overall memory usage would never be more than 50% higher than what it is today, because `TimSorter` never needs more than X temporary slots if the postings list doesn't have at least 2*X entries, and these 2*X entries already get loaded into memory today. And for fields that have short postings, memory usage should actually be lower.
This commit is contained in:
parent
f5ea0412eb
commit
ddd63d2da3
|
@ -250,8 +250,12 @@ Optimizations
|
|||
|
||||
* GITHUB#12006: Do ints compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries. (Guo Feng)
|
||||
|
||||
* GITHUB#12011: Minor speedup to flushing long postings lists when an index
|
||||
sort is configured. (Adrien Grand)
|
||||
|
||||
* GITHUB#12017: Aggressive count in BooleanWeight. (Lu Xugang)
|
||||
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.apache.lucene.util.CollectionUtil;
|
|||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IntBlockPool;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.TimSorter;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
|
@ -228,12 +230,12 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
|
||||
private int[] docs;
|
||||
private int[] freqs;
|
||||
private final int[] tmpDocs;
|
||||
private int[] tmpDocs;
|
||||
private int[] tmpFreqs;
|
||||
|
||||
DocFreqSorter(int maxDoc) {
|
||||
super(maxDoc / 64);
|
||||
this.tmpDocs = new int[maxDoc / 64];
|
||||
super(maxDoc / 8);
|
||||
this.tmpDocs = IntsRef.EMPTY_INTS;
|
||||
}
|
||||
|
||||
public void reset(int[] docs, int[] freqs) {
|
||||
|
@ -272,6 +274,12 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
|
||||
@Override
|
||||
protected void save(int i, int len) {
|
||||
if (tmpDocs.length < len) {
|
||||
tmpDocs = new int[ArrayUtil.oversize(len, Integer.BYTES)];
|
||||
if (freqs != null) {
|
||||
tmpFreqs = new int[tmpDocs.length];
|
||||
}
|
||||
}
|
||||
System.arraycopy(docs, i, tmpDocs, 0, len);
|
||||
if (freqs != null) {
|
||||
System.arraycopy(freqs, i, tmpFreqs, 0, len);
|
||||
|
@ -423,13 +431,13 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
|
||||
private int[] docs;
|
||||
private long[] offsets;
|
||||
private final int[] tmpDocs;
|
||||
private final long[] tmpOffsets;
|
||||
private int[] tmpDocs;
|
||||
private long[] tmpOffsets;
|
||||
|
||||
public DocOffsetSorter(int maxDoc) {
|
||||
super(maxDoc / 64);
|
||||
this.tmpDocs = new int[maxDoc / 64];
|
||||
this.tmpOffsets = new long[maxDoc / 64];
|
||||
super(maxDoc / 8);
|
||||
this.tmpDocs = IntsRef.EMPTY_INTS;
|
||||
this.tmpOffsets = LongsRef.EMPTY_LONGS;
|
||||
}
|
||||
|
||||
public void reset(int[] docs, long[] offsets) {
|
||||
|
@ -461,6 +469,10 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
|
||||
@Override
|
||||
protected void save(int i, int len) {
|
||||
if (tmpDocs.length < len) {
|
||||
tmpDocs = new int[ArrayUtil.oversize(len, Integer.BYTES)];
|
||||
tmpOffsets = new long[tmpDocs.length];
|
||||
}
|
||||
System.arraycopy(docs, i, tmpDocs, 0, len);
|
||||
System.arraycopy(offsets, i, tmpOffsets, 0, len);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue