mirror of https://github.com/apache/lucene.git
LUCENE-8593: Specialize single value numeric DV updates
The case when all values are the the same on a numeric field update is common for soft_deletes. With the new infrastucture for buffering DV updates we can gain an easy win by specializing the applied updates if all values are the same.
This commit is contained in:
parent
38cfd0e259
commit
b4e1fe4393
|
@ -425,4 +425,52 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
||||||
return hasValue;
|
return hasValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static abstract class SingleValueDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
|
|
||||||
|
protected SingleValueDocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
|
||||||
|
super(maxDoc, delGen, field, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void add(int doc, long value) {
|
||||||
|
assert longValue() == value;
|
||||||
|
super.add(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void add(int doc, BytesRef value) {
|
||||||
|
assert binaryValue().equals(value);
|
||||||
|
super.add(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void add(int docId, Iterator iterator) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract BytesRef binaryValue();
|
||||||
|
|
||||||
|
protected abstract long longValue();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Iterator iterator() {
|
||||||
|
return new DocValuesFieldUpdates.AbstractIterator(size, docs, delGen) {
|
||||||
|
@Override
|
||||||
|
protected void set(long idx) {
|
||||||
|
// nothing to do;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
long longValue() {
|
||||||
|
return SingleValueDocValuesFieldUpdates.this.longValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
BytesRef binaryValue() {
|
||||||
|
return SingleValueDocValuesFieldUpdates.this.binaryValue();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -183,6 +183,18 @@ final class FieldUpdatesBuffer {
|
||||||
return isNumeric;
|
return isNumeric;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean hasSingleValue() {
|
||||||
|
// we only do this optimization for numerics so far.
|
||||||
|
return isNumeric && numericValues.length == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
long getNumericValue(int idx) {
|
||||||
|
if (hasValues != null && hasValues.get(idx) == false) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return numericValues[getArrayIndex(numericValues.length, idx)];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Struct like class that is used to iterate over all updates in this buffer
|
* Struct like class that is used to iterate over all updates in this buffer
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -484,9 +484,10 @@ final class FrozenBufferedUpdates {
|
||||||
long updateCount = 0;
|
long updateCount = 0;
|
||||||
|
|
||||||
// We first write all our updates private, and only in the end publish to the ReadersAndUpdates */
|
// We first write all our updates private, and only in the end publish to the ReadersAndUpdates */
|
||||||
Map<String, DocValuesFieldUpdates> holder = new HashMap<>();
|
final List<DocValuesFieldUpdates> resolvedUpdates = new ArrayList<>();
|
||||||
for (Map.Entry<String, FieldUpdatesBuffer> fieldUpdate : updates.entrySet()) {
|
for (Map.Entry<String, FieldUpdatesBuffer> fieldUpdate : updates.entrySet()) {
|
||||||
String updateField = fieldUpdate.getKey();
|
String updateField = fieldUpdate.getKey();
|
||||||
|
DocValuesFieldUpdates dvUpdates = null;
|
||||||
FieldUpdatesBuffer value = fieldUpdate.getValue();
|
FieldUpdatesBuffer value = fieldUpdate.getValue();
|
||||||
boolean isNumeric = value.isNumeric();
|
boolean isNumeric = value.isNumeric();
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
|
||||||
|
@ -534,14 +535,19 @@ final class FrozenBufferedUpdates {
|
||||||
if (termsEnum.seekExact(bufferedUpdate.termValue)) {
|
if (termsEnum.seekExact(bufferedUpdate.termValue)) {
|
||||||
// we don't need term frequencies for this
|
// we don't need term frequencies for this
|
||||||
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
|
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
|
||||||
DocValuesFieldUpdates dvUpdates = holder.get(updateField);
|
|
||||||
if (dvUpdates == null) {
|
if (dvUpdates == null) {
|
||||||
if (isNumeric) {
|
if (isNumeric) {
|
||||||
dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
if (value.hasSingleValue()) {
|
||||||
|
dvUpdates = new NumericDocValuesFieldUpdates
|
||||||
|
.SingleValueNumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc(),
|
||||||
|
value.getNumericValue(0));
|
||||||
|
} else {
|
||||||
|
dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
||||||
}
|
}
|
||||||
holder.put(updateField, dvUpdates);
|
resolvedUpdates.add(dvUpdates);
|
||||||
}
|
}
|
||||||
final IntConsumer docIdConsumer;
|
final IntConsumer docIdConsumer;
|
||||||
final DocValuesFieldUpdates update = dvUpdates;
|
final DocValuesFieldUpdates update = dvUpdates;
|
||||||
|
@ -582,7 +588,7 @@ final class FrozenBufferedUpdates {
|
||||||
}
|
}
|
||||||
|
|
||||||
// now freeze & publish:
|
// now freeze & publish:
|
||||||
for (DocValuesFieldUpdates update : holder.values()) {
|
for (DocValuesFieldUpdates update : resolvedUpdates) {
|
||||||
if (update.any()) {
|
if (update.any()) {
|
||||||
update.finish();
|
update.finish();
|
||||||
segState.rld.addDVUpdate(update);
|
segState.rld.addDVUpdate(update);
|
||||||
|
|
|
@ -111,4 +111,24 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
+ Long.BYTES
|
+ Long.BYTES
|
||||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class SingleValueNumericDocValuesFieldUpdates extends SingleValueDocValuesFieldUpdates {
|
||||||
|
|
||||||
|
private final long value;
|
||||||
|
|
||||||
|
SingleValueNumericDocValuesFieldUpdates(long delGen, String field, int maxDoc, long value) {
|
||||||
|
super(maxDoc, delGen, field, DocValuesType.NUMERIC);
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BytesRef binaryValue() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected long longValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,9 +36,13 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6);
|
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6);
|
||||||
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(counter, update, 15);
|
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(counter, update, 15);
|
||||||
buffer.addUpdate(new Term("id", "10"), 6, 15);
|
buffer.addUpdate(new Term("id", "10"), 6, 15);
|
||||||
|
assertTrue(buffer.hasSingleValue());
|
||||||
buffer.addUpdate(new Term("id", "8"), 12, 15);
|
buffer.addUpdate(new Term("id", "8"), 12, 15);
|
||||||
|
assertFalse(buffer.hasSingleValue());
|
||||||
buffer.addUpdate(new Term("some_other_field", "8"), 13, 17);
|
buffer.addUpdate(new Term("some_other_field", "8"), 13, 17);
|
||||||
|
assertFalse(buffer.hasSingleValue());
|
||||||
buffer.addUpdate(new Term("id", "8"), 12, 16);
|
buffer.addUpdate(new Term("id", "8"), 12, 16);
|
||||||
|
assertFalse(buffer.hasSingleValue());
|
||||||
assertTrue(buffer.isNumeric());
|
assertTrue(buffer.isNumeric());
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
||||||
FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
|
FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
|
||||||
|
@ -214,14 +218,17 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while ((value = iterator.next()) != null) {
|
while ((value = iterator.next()) != null) {
|
||||||
|
long v = buffer.getNumericValue(count);
|
||||||
randomUpdate = updates.get(count++);
|
randomUpdate = updates.get(count++);
|
||||||
assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
|
assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
|
||||||
assertEquals(randomUpdate.term.field, value.termField);
|
assertEquals(randomUpdate.term.field, value.termField);
|
||||||
assertEquals(randomUpdate.hasValue, value.hasValue);
|
assertEquals(randomUpdate.hasValue, value.hasValue);
|
||||||
if (randomUpdate.hasValue) {
|
if (randomUpdate.hasValue) {
|
||||||
assertEquals(randomUpdate.getValue(), value.numericValue);
|
assertEquals(randomUpdate.getValue(), value.numericValue);
|
||||||
|
assertEquals(v, value.numericValue);
|
||||||
} else {
|
} else {
|
||||||
assertEquals(0, value.numericValue);
|
assertEquals(0, value.numericValue);
|
||||||
|
assertEquals(0, v);
|
||||||
}
|
}
|
||||||
assertEquals(randomUpdate.docIDUpto, value.docUpTo);
|
assertEquals(randomUpdate.docIDUpto, value.docUpTo);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue