Optimize computing number of levels in MultiLevelSkipListWriter#bufferSkip (#12653)

* Optimize computing number of levels in MultiLevelSkipListWriter#bufferSkip

* CHANGES.txt entry
This commit is contained in:
Shubham Chaudhary 2023-10-21 17:22:28 +05:30 committed by GitHub
parent 90f8bac9f7
commit de8ae1de7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 15 deletions

View File

@ -209,6 +209,8 @@ Optimizations
* GITHUB#12651: Use 2d array for OnHeapHnswGraph representation. (Patrick Zhai)
* GITHUB#12653: Optimize computing number of levels in MultiLevelSkipListWriter#bufferSkip. (Shubham Chaudhary)
Changes in runtime behavior
---------------------

View File

@ -63,25 +63,24 @@ public abstract class MultiLevelSkipListWriter {
/** for every skip level a different buffer is used */
private ByteBuffersDataOutput[] skipBuffer;
/** Length of the window at which the skips are placed on skip level 1 */
private final int windowLength;
/** Creates a {@code MultiLevelSkipListWriter}. */
protected MultiLevelSkipListWriter(
int skipInterval, int skipMultiplier, int maxSkipLevels, int df) {
this.skipInterval = skipInterval;
this.skipMultiplier = skipMultiplier;
int numberOfSkipLevels;
// calculate the maximum number of skip levels for this document frequency
if (df <= skipInterval) {
numberOfSkipLevels = 1;
if (df > skipInterval) {
// also make sure it does not exceed maxSkipLevels
this.numberOfSkipLevels =
Math.min(1 + MathUtil.log(df / skipInterval, skipMultiplier), maxSkipLevels);
} else {
numberOfSkipLevels = 1 + MathUtil.log(df / skipInterval, skipMultiplier);
this.numberOfSkipLevels = 1;
}
// make sure it does not exceed maxSkipLevels
if (numberOfSkipLevels > maxSkipLevels) {
numberOfSkipLevels = maxSkipLevels;
}
this.numberOfSkipLevels = numberOfSkipLevels;
this.windowLength = Math.toIntExact(skipInterval * (long) skipMultiplier);
}
/**
@ -130,13 +129,17 @@ public abstract class MultiLevelSkipListWriter {
assert df % skipInterval == 0;
int numLevels = 1;
df /= skipInterval;
// This optimizes the most common case i.e. numLevels = 1, it does a single modulo check to
// catch that case
if (df % windowLength == 0) {
numLevels++;
df /= windowLength;
// determine max level
while ((df % skipMultiplier) == 0 && numLevels < numberOfSkipLevels) {
numLevels++;
df /= skipMultiplier;
}
}
long childPointer = 0;