Speed up IndexedDISI Sparse #AdvanceExactWithinBlock for tiny step advance (#12324)

This commit is contained in:
gf2121 2023-06-13 14:24:26 +08:00 committed by GitHub
parent c8e05c8cd6
commit 30eba6df56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 4 deletions

View File

@ -173,6 +173,8 @@ Improvements
Optimizations Optimizations
--------------------- ---------------------
* GITHUB#12324: Speed up sparse block advanceExact with tiny step in IndexedDISI. (Guo Feng)
* GITHUB#12270 Don't generate stacktrace in CollectionTerminatedException. (Armin Braun) * GITHUB#12270 Don't generate stacktrace in CollectionTerminatedException. (Armin Braun)
* GITHUB#12160: Concurrent rewrite for AbstractKnnVectorQuery. (Kaival Parikh) * GITHUB#12160: Concurrent rewrite for AbstractKnnVectorQuery. (Kaival Parikh)

View File

@ -110,12 +110,12 @@ public final class IndexedDISI extends DocIdSetIterator {
private static void flush( private static void flush(
int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out)
throws IOException { throws IOException {
assert block >= 0 && block < 65536; assert block >= 0 && block < BLOCK_SIZE;
out.writeShort((short) block); out.writeShort((short) block);
assert cardinality > 0 && cardinality <= 65536; assert cardinality > 0 && cardinality <= BLOCK_SIZE;
out.writeShort((short) (cardinality - 1)); out.writeShort((short) (cardinality - 1));
if (cardinality > MAX_ARRAY_LENGTH) { if (cardinality > MAX_ARRAY_LENGTH) {
if (cardinality != 65536) { // all docs are set if (cardinality != BLOCK_SIZE) { // all docs are set
if (denseRankPower != -1) { if (denseRankPower != -1) {
final byte[] rank = createRank(buffer, denseRankPower); final byte[] rank = createRank(buffer, denseRankPower);
out.writeBytes(rank, rank.length); out.writeBytes(rank, rank.length);
@ -418,6 +418,7 @@ public final class IndexedDISI extends DocIdSetIterator {
// SPARSE variables // SPARSE variables
boolean exists; boolean exists;
int nextExistDocInBlock = -1;
// DENSE variables // DENSE variables
long word; long word;
@ -495,7 +496,8 @@ public final class IndexedDISI extends DocIdSetIterator {
if (numValues <= MAX_ARRAY_LENGTH) { if (numValues <= MAX_ARRAY_LENGTH) {
method = Method.SPARSE; method = Method.SPARSE;
blockEnd = slice.getFilePointer() + (numValues << 1); blockEnd = slice.getFilePointer() + (numValues << 1);
} else if (numValues == 65536) { nextExistDocInBlock = -1;
} else if (numValues == BLOCK_SIZE) {
method = Method.ALL; method = Method.ALL;
blockEnd = slice.getFilePointer(); blockEnd = slice.getFilePointer();
gap = block - index - 1; gap = block - index - 1;
@ -550,6 +552,7 @@ public final class IndexedDISI extends DocIdSetIterator {
if (doc >= targetInBlock) { if (doc >= targetInBlock) {
disi.doc = disi.block | doc; disi.doc = disi.block | doc;
disi.exists = true; disi.exists = true;
disi.nextExistDocInBlock = doc;
return true; return true;
} }
} }
@ -560,6 +563,10 @@ public final class IndexedDISI extends DocIdSetIterator {
boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException { boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException {
final int targetInBlock = target & 0xFFFF; final int targetInBlock = target & 0xFFFF;
// TODO: binary search // TODO: binary search
if (disi.nextExistDocInBlock > targetInBlock) {
assert !disi.exists;
return false;
}
if (target == disi.doc) { if (target == disi.doc) {
return disi.exists; return disi.exists;
} }
@ -567,6 +574,7 @@ public final class IndexedDISI extends DocIdSetIterator {
int doc = Short.toUnsignedInt(disi.slice.readShort()); int doc = Short.toUnsignedInt(disi.slice.readShort());
disi.index++; disi.index++;
if (doc >= targetInBlock) { if (doc >= targetInBlock) {
disi.nextExistDocInBlock = doc;
if (doc != targetInBlock) { if (doc != targetInBlock) {
disi.index--; disi.index--;
disi.slice.seek(disi.slice.getFilePointer() - Short.BYTES); disi.slice.seek(disi.slice.getFilePointer() - Short.BYTES);