mirror of https://github.com/apache/lucene.git
LUCENE-8599: Use sparse bitset to store docs in SingleValueDocValuesFieldUpdates
Using a sparse bitset in SingleValueDocValuesFieldUdpates allows storing which documents have an update much more efficient and prevents the need to sort the docs array altogether that showed to be a significant bottleneck in LUCENE-8598. Using the spares bitset yields another 10x performance improvement in applying updates versus the changes proposed in LUCENE-8598.
This commit is contained in:
parent
fdd9b55df0
commit
ef61b547b1
|
@ -271,6 +271,9 @@ Optimizations
|
|||
* LUCENE-8598: Moved to the default accepted overhead ratio for packet ints in DocValuesFieldUpdats
|
||||
yields an up-to 4x performance improvement when applying doc values updates. (Simon Willnauer, Adrien Grand)
|
||||
|
||||
* LUCENE-8599: Use sparse bitset to store docs in SingleValueDocValuesFieldUpdates.
|
||||
(Simon Willnauer, Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-8573: BKDWriter now uses FutureArrays#mismatch to compute shared prefixes.
|
||||
|
|
|
@ -18,10 +18,13 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PagedMutable;
|
||||
|
||||
|
@ -282,7 +285,6 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
|||
throw new IllegalStateException("already finished");
|
||||
}
|
||||
finished = true;
|
||||
|
||||
// shrink wrap
|
||||
if (size < docs.size()) {
|
||||
resize(size);
|
||||
|
@ -304,7 +306,7 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
|||
}
|
||||
|
||||
/** Returns true if this instance contains any updates. */
|
||||
synchronized final boolean any() {
|
||||
synchronized boolean any() {
|
||||
return size > 0;
|
||||
}
|
||||
|
||||
|
@ -316,7 +318,7 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
|||
* Adds an update that resets the documents value.
|
||||
* @param doc the doc to update
|
||||
*/
|
||||
final synchronized void reset(int doc) {
|
||||
synchronized void reset(int doc) {
|
||||
addInternal(doc, HAS_NO_VALUE_MASK);
|
||||
}
|
||||
final synchronized int add(int doc) {
|
||||
|
@ -431,21 +433,42 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
|||
}
|
||||
|
||||
static abstract class SingleValueDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||
|
||||
private final BitSet bitSet;
|
||||
private BitSet hasNoValue;
|
||||
private boolean hasAtLeastOneValue;
|
||||
protected SingleValueDocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
|
||||
super(maxDoc, delGen, field, type);
|
||||
this.bitSet = new SparseFixedBitSet(maxDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
void add(int doc, long value) {
|
||||
assert longValue() == value;
|
||||
super.add(doc);
|
||||
bitSet.set(doc);
|
||||
this.hasAtLeastOneValue = true;
|
||||
if (hasNoValue != null) {
|
||||
hasNoValue.clear(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void add(int doc, BytesRef value) {
|
||||
assert binaryValue().equals(value);
|
||||
super.add(doc);
|
||||
bitSet.set(doc);
|
||||
this.hasAtLeastOneValue = true;
|
||||
if (hasNoValue != null) {
|
||||
hasNoValue.clear(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
synchronized void reset(int doc) {
|
||||
bitSet.set(doc);
|
||||
this.hasAtLeastOneValue = true;
|
||||
if (hasNoValue == null) {
|
||||
hasNoValue = new SparseFixedBitSet(maxDoc);
|
||||
}
|
||||
hasNoValue.set(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -458,11 +481,28 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
|||
protected abstract long longValue();
|
||||
|
||||
@Override
|
||||
Iterator iterator() {
|
||||
return new DocValuesFieldUpdates.AbstractIterator(size, docs, delGen) {
|
||||
synchronized boolean any() {
|
||||
return super.any() || hasAtLeastOneValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void set(long idx) {
|
||||
// nothing to do;
|
||||
public long ramBytesUsed() {
|
||||
return super.ramBytesUsed() + bitSet.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
Iterator iterator() {
|
||||
BitSetIterator iterator = new BitSetIterator(bitSet, maxDoc);
|
||||
return new DocValuesFieldUpdates.Iterator() {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return iterator.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
return iterator.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -474,6 +514,19 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
|||
BytesRef binaryValue() {
|
||||
return SingleValueDocValuesFieldUpdates.this.binaryValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
long delGen() {
|
||||
return delGen;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasValue() {
|
||||
if (hasNoValue != null) {
|
||||
return hasNoValue.get(docID()) == false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.NumericDocValuesFieldUpdates.SingleValueNumericDocValuesFieldUpdates;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -129,7 +130,63 @@ public class TestDocValuesFieldUpdates extends LuceneTestCase {
|
|||
}
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testSharedValueUpdates() {
|
||||
int delGen = random().nextInt();
|
||||
int maxDoc = 1 + random().nextInt(1000);
|
||||
long value = random().nextLong();
|
||||
SingleValueNumericDocValuesFieldUpdates update = new SingleValueNumericDocValuesFieldUpdates(delGen, "foo", maxDoc, value);
|
||||
assertEquals(value, update.longValue());
|
||||
Boolean[] values = new Boolean[maxDoc];
|
||||
boolean any = false;
|
||||
boolean noReset = random().nextBoolean(); // sometimes don't reset
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (random().nextBoolean()) {
|
||||
values[i] = Boolean.TRUE;
|
||||
any = true;
|
||||
update.add(i, value);
|
||||
} else if (random().nextBoolean() && noReset == false) {
|
||||
values[i] = null;
|
||||
any = true;
|
||||
update.reset(i);
|
||||
} else {
|
||||
values[i] = Boolean.FALSE;
|
||||
}
|
||||
}
|
||||
if (noReset == false) {
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
if (rarely()) {
|
||||
if (values[i] == null) {
|
||||
values[i] = Boolean.TRUE;
|
||||
update.add(i, value);
|
||||
} else if (values[i]) {
|
||||
values[i] = null;
|
||||
update.reset(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
update.finish();
|
||||
DocValuesFieldUpdates.Iterator iterator = update.iterator();
|
||||
assertEquals(any, update.any());
|
||||
assertEquals(delGen, iterator.delGen());
|
||||
int index = 0;
|
||||
while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int doc = iterator.docID();
|
||||
if (index < iterator.docID()) {
|
||||
for (;index < doc; index++) {
|
||||
assertFalse(values[index]);
|
||||
}
|
||||
}
|
||||
if (index == doc) {
|
||||
if (values[index++] == null) {
|
||||
assertFalse(iterator.hasValue());
|
||||
} else {
|
||||
assertTrue(iterator.hasValue());
|
||||
assertEquals(value, iterator.longValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue