mirror of https://github.com/apache/lucene.git
LUCENE-10280: Store BKD blocks with continuous ids more efficiently (#510)
This commit is contained in:
parent
4d48dc87f7
commit
892e324d02
|
@ -52,6 +52,8 @@ Improvements
|
|||
Optimizations
|
||||
---------------------
|
||||
|
||||
* LUCENE-10280: Optimize BKD leaves' doc IDs codec when they are continuous. (Guo Feng)
|
||||
|
||||
* LUCENE-10233: Store BKD leaves' doc IDs as bitset in some cases (typically for low cardinality fields
|
||||
or sorted indices) to speed up addAll. (Guo Feng, Adrien Grand)
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ public class DocBaseBitSetIterator extends DocIdSetIterator {
|
|||
throw new IllegalArgumentException("cost must be >= 0, got " + cost);
|
||||
}
|
||||
if ((docBase & 63) != 0) {
|
||||
throw new IllegalArgumentException("docBase need to be a multiple of 64");
|
||||
throw new IllegalArgumentException("docBase need to be a multiple of 64, got " + docBase);
|
||||
}
|
||||
this.bits = bits;
|
||||
this.length = bits.length() + docBase;
|
||||
|
|
|
@ -44,14 +44,23 @@ class DocIdsWriter {
|
|||
}
|
||||
}
|
||||
|
||||
if (strictlySorted && (docIds[start + count - 1] - docIds[start] + 1) <= (count << 4)) {
|
||||
// Only trigger this optimization when max - min + 1 <= 16 * count in order to avoid expanding
|
||||
// too much storage.
|
||||
int min2max = docIds[start + count - 1] - docIds[start] + 1;
|
||||
if (strictlySorted) {
|
||||
if (min2max == count) {
|
||||
// continuous ids, typically happens when segment is sorted
|
||||
out.writeByte((byte) -2);
|
||||
out.writeVInt(docIds[start]);
|
||||
return;
|
||||
} else if (min2max <= (count << 4)) {
|
||||
assert min2max > count : "min2max: " + min2max + ", count: " + count;
|
||||
// Only trigger bitset optimization when max - min + 1 <= 16 * count in order to avoid
|
||||
// expanding too much storage.
|
||||
// A field with lower cardinality will have higher probability to trigger this optimization.
|
||||
out.writeByte((byte) -1);
|
||||
writeIdsAsBitSet(docIds, start, count, out);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (sorted) {
|
||||
out.writeByte((byte) 0);
|
||||
int previous = 0;
|
||||
|
@ -139,6 +148,9 @@ class DocIdsWriter {
|
|||
static void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
|
||||
final int bpv = in.readByte();
|
||||
switch (bpv) {
|
||||
case -2:
|
||||
readContinuousIds(in, count, docIDs);
|
||||
break;
|
||||
case -1:
|
||||
readBitSet(in, count, docIDs);
|
||||
break;
|
||||
|
@ -165,6 +177,13 @@ class DocIdsWriter {
|
|||
return new DocBaseBitSetIterator(bitSet, count, offsetWords << 6);
|
||||
}
|
||||
|
||||
private static void readContinuousIds(IndexInput in, int count, int[] docIDs) throws IOException {
|
||||
int start = in.readVInt();
|
||||
for (int i = 0; i < count; i++) {
|
||||
docIDs[i] = start + i;
|
||||
}
|
||||
}
|
||||
|
||||
private static void readBitSet(IndexInput in, int count, int[] docIDs) throws IOException {
|
||||
DocIdSetIterator iterator = readBitSetIterator(in, count);
|
||||
int docId, pos = 0;
|
||||
|
@ -215,6 +234,9 @@ class DocIdsWriter {
|
|||
static void readInts(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
|
||||
final int bpv = in.readByte();
|
||||
switch (bpv) {
|
||||
case -2:
|
||||
readContinuousIds(in, count, visitor);
|
||||
break;
|
||||
case -1:
|
||||
readBitSet(in, count, visitor);
|
||||
break;
|
||||
|
@ -274,4 +296,15 @@ class DocIdsWriter {
|
|||
DocIdSetIterator bitSetIterator = readBitSetIterator(in, count);
|
||||
visitor.visit(bitSetIterator);
|
||||
}
|
||||
|
||||
private static void readContinuousIds(IndexInput in, int count, IntersectVisitor visitor)
|
||||
throws IOException {
|
||||
int start = in.readVInt();
|
||||
int extra = start & 63;
|
||||
int offset = start - extra;
|
||||
int numBits = count + extra;
|
||||
FixedBitSet bitSet = new FixedBitSet(numBits);
|
||||
bitSet.set(extra, numBits);
|
||||
visitor.visit(new DocBaseBitSetIterator(bitSet, count, offset));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,6 +76,21 @@ public class TestDocIdsWriter extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testContinuousIds() throws Exception {
|
||||
int numIters = atLeast(100);
|
||||
try (Directory dir = newDirectory()) {
|
||||
for (int iter = 0; iter < numIters; ++iter) {
|
||||
int size = 1 + random().nextInt(5000);
|
||||
int[] docIDs = new int[size];
|
||||
int start = random().nextInt(1000000);
|
||||
for (int i = 0; i < docIDs.length; i++) {
|
||||
docIDs[i] = start + i;
|
||||
}
|
||||
test(dir, docIDs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void test(Directory dir, int[] ints) throws Exception {
|
||||
final long len;
|
||||
try (IndexOutput out = dir.createOutput("tmp", IOContext.DEFAULT)) {
|
||||
|
|
Loading…
Reference in New Issue