Refactor IndexedDISI to avoid method call when advancing by small gaps.

This commit is contained in:
Adrien Grand 2019-01-25 10:21:48 +01:00
parent ef47582fd5
commit 23bd97bae7
1 changed files with 34 additions and 35 deletions

View File

@ -254,13 +254,15 @@ final class IndexedDISI extends DocIdSetIterator {
return (short)blockCount;
}
// Members are pkg-private to avoid synthetic accessors when accessed from the `Method` enum
/** The slice that stores the {@link DocIdSetIterator}. */
private final IndexInput slice;
private final int jumpTableEntryCount;
private final byte denseRankPower;
private final RandomAccessInput jumpTable; // Skip blocks of 64K bits
private final byte[] denseRankTable;
private final long cost;
final IndexInput slice;
final int jumpTableEntryCount;
final byte denseRankPower;
final RandomAccessInput jumpTable; // Skip blocks of 64K bits
final byte[] denseRankTable;
final long cost;
/**
* This constructor always creates a new blockSlice and a new jumpTable from in, to ensure that operations are
@ -344,28 +346,28 @@ final class IndexedDISI extends DocIdSetIterator {
}
}
private int block = -1;
private long blockEnd;
private long denseBitmapOffset = -1; // Only used for DENSE blocks
private int nextBlockIndex = -1;
int block = -1;
long blockEnd;
long denseBitmapOffset = -1; // Only used for DENSE blocks
int nextBlockIndex = -1;
Method method;
private int doc = -1;
private int index = -1;
int doc = -1;
int index = -1;
// SPARSE variables
private boolean exists;
boolean exists;
// DENSE variables
private long word;
private int wordIndex = -1;
long word;
int wordIndex = -1;
// number of one bits encountered so far, including those of `word`
private int numberOfOnes;
int numberOfOnes;
// Used with rank for jumps inside of DENSE as they are absolute instead of relative
private int denseOrigoIndex;
int denseOrigoIndex;
// ALL variables
private int gap;
int gap;
@Override
public int docID() {
@ -514,7 +516,11 @@ final class IndexedDISI extends DocIdSetIterator {
final int targetWordIndex = targetInBlock >>> 6;
// If possible, skip ahead using the rank cache
rankSkip(disi, target);
// If the distance between the current position and the target is < rank-longs
// there is no sense in using rank
if (disi.denseRankPower != -1 && targetWordIndex - disi.wordIndex >= (1 << (disi.denseRankPower-6) )) {
rankSkip(disi, targetInBlock);
}
for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
disi.word = disi.slice.readLong();
@ -550,7 +556,12 @@ final class IndexedDISI extends DocIdSetIterator {
final int targetInBlock = target & 0xFFFF;
final int targetWordIndex = targetInBlock >>> 6;
rankSkip(disi, target);
// If possible, skip ahead using the rank cache
// If the distance between the current position and the target is < rank-longs
// there is no sense in using rank
if (disi.denseRankPower != -1 && targetWordIndex - disi.wordIndex >= (1 << (disi.denseRankPower-6) )) {
rankSkip(disi, targetInBlock);
}
for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
disi.word = disi.slice.readLong();
@ -594,23 +605,11 @@ final class IndexedDISI extends DocIdSetIterator {
* Note: This does not guarantee a skip up to target, only up to nearest rank boundary. It is the
* responsibility of the caller to iterate further to reach target.
* @param disi standard DISI.
* @param target the wanted docID for which to calculate set-flag and index.
* @param targetInBlock lower 16 bits of the target
* @throws IOException if a DISI seek failed.
*/
private static void rankSkip(IndexedDISI disi, int target) throws IOException {
if (disi.denseRankPower == -1) { // No rank for the current structure
return;
}
final int targetInBlock = target & 0xFFFF; // Lower 16 bits
final int targetWordIndex = targetInBlock >>> 6; // long: 2^6 = 64
// If the distance between the current position and the target is < rank-longs
// there is no sense in using rank
if (targetWordIndex - disi.wordIndex < (1 << (disi.denseRankPower-6) )) {
return;
}
private static void rankSkip(IndexedDISI disi, int targetInBlock) throws IOException {
assert disi.denseRankPower >= 0 : disi.denseRankPower;
// Resolve the rank as close to targetInBlock as possible (maximum distance is 8 longs)
// Note: rankOrigoOffset is tracked on block open, so it is absolute (e.g. don't add origo)
final int rankIndex = targetInBlock >> disi.denseRankPower; // Default is 9 (8 longs: 2^3 * 2^6 = 512 docIDs)