LUCENE-8593: Specialize single value numeric DV updates

The case when all values are the the same on a numeric field update
is common for soft_deletes. With the new infrastucture for buffering
DV updates we can gain an easy win by specializing the applied updates
if all values are the same.
This commit is contained in:
Simon Willnauer 2018-12-05 21:58:30 +01:00
parent 38cfd0e259
commit b4e1fe4393
5 changed files with 98 additions and 5 deletions

View File

@ -425,4 +425,52 @@ abstract class DocValuesFieldUpdates implements Accountable {
return hasValue; return hasValue;
} }
} }
static abstract class SingleValueDocValuesFieldUpdates extends DocValuesFieldUpdates {
protected SingleValueDocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
super(maxDoc, delGen, field, type);
}
@Override
void add(int doc, long value) {
assert longValue() == value;
super.add(doc);
}
@Override
void add(int doc, BytesRef value) {
assert binaryValue().equals(value);
super.add(doc);
}
@Override
void add(int docId, Iterator iterator) {
throw new UnsupportedOperationException();
}
protected abstract BytesRef binaryValue();
protected abstract long longValue();
@Override
Iterator iterator() {
return new DocValuesFieldUpdates.AbstractIterator(size, docs, delGen) {
@Override
protected void set(long idx) {
// nothing to do;
}
@Override
long longValue() {
return SingleValueDocValuesFieldUpdates.this.longValue();
}
@Override
BytesRef binaryValue() {
return SingleValueDocValuesFieldUpdates.this.binaryValue();
}
};
}
}
} }

View File

@ -183,6 +183,18 @@ final class FieldUpdatesBuffer {
return isNumeric; return isNumeric;
} }
boolean hasSingleValue() {
// we only do this optimization for numerics so far.
return isNumeric && numericValues.length == 1;
}
long getNumericValue(int idx) {
if (hasValues != null && hasValues.get(idx) == false) {
return 0;
}
return numericValues[getArrayIndex(numericValues.length, idx)];
}
/** /**
* Struct like class that is used to iterate over all updates in this buffer * Struct like class that is used to iterate over all updates in this buffer
*/ */

View File

@ -484,9 +484,10 @@ final class FrozenBufferedUpdates {
long updateCount = 0; long updateCount = 0;
// We first write all our updates private, and only in the end publish to the ReadersAndUpdates */ // We first write all our updates private, and only in the end publish to the ReadersAndUpdates */
Map<String, DocValuesFieldUpdates> holder = new HashMap<>(); final List<DocValuesFieldUpdates> resolvedUpdates = new ArrayList<>();
for (Map.Entry<String, FieldUpdatesBuffer> fieldUpdate : updates.entrySet()) { for (Map.Entry<String, FieldUpdatesBuffer> fieldUpdate : updates.entrySet()) {
String updateField = fieldUpdate.getKey(); String updateField = fieldUpdate.getKey();
DocValuesFieldUpdates dvUpdates = null;
FieldUpdatesBuffer value = fieldUpdate.getValue(); FieldUpdatesBuffer value = fieldUpdate.getValue();
boolean isNumeric = value.isNumeric(); boolean isNumeric = value.isNumeric();
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator(); FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
@ -534,14 +535,19 @@ final class FrozenBufferedUpdates {
if (termsEnum.seekExact(bufferedUpdate.termValue)) { if (termsEnum.seekExact(bufferedUpdate.termValue)) {
// we don't need term frequencies for this // we don't need term frequencies for this
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
DocValuesFieldUpdates dvUpdates = holder.get(updateField);
if (dvUpdates == null) { if (dvUpdates == null) {
if (isNumeric) { if (isNumeric) {
if (value.hasSingleValue()) {
dvUpdates = new NumericDocValuesFieldUpdates
.SingleValueNumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc(),
value.getNumericValue(0));
} else {
dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc()); dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
}
} else { } else {
dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc()); dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
} }
holder.put(updateField, dvUpdates); resolvedUpdates.add(dvUpdates);
} }
final IntConsumer docIdConsumer; final IntConsumer docIdConsumer;
final DocValuesFieldUpdates update = dvUpdates; final DocValuesFieldUpdates update = dvUpdates;
@ -582,7 +588,7 @@ final class FrozenBufferedUpdates {
} }
// now freeze & publish: // now freeze & publish:
for (DocValuesFieldUpdates update : holder.values()) { for (DocValuesFieldUpdates update : resolvedUpdates) {
if (update.any()) { if (update.any()) {
update.finish(); update.finish();
segState.rld.addDVUpdate(update); segState.rld.addDVUpdate(update);

View File

@ -111,4 +111,24 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
+ Long.BYTES + Long.BYTES
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF; + RamUsageEstimator.NUM_BYTES_OBJECT_REF;
} }
static class SingleValueNumericDocValuesFieldUpdates extends SingleValueDocValuesFieldUpdates {
private final long value;
SingleValueNumericDocValuesFieldUpdates(long delGen, String field, int maxDoc, long value) {
super(maxDoc, delGen, field, DocValuesType.NUMERIC);
this.value = value;
}
@Override
protected BytesRef binaryValue() {
throw new UnsupportedOperationException();
}
@Override
protected long longValue() {
return value;
}
}
} }

View File

@ -36,9 +36,13 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6); new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6);
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(counter, update, 15); FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(counter, update, 15);
buffer.addUpdate(new Term("id", "10"), 6, 15); buffer.addUpdate(new Term("id", "10"), 6, 15);
assertTrue(buffer.hasSingleValue());
buffer.addUpdate(new Term("id", "8"), 12, 15); buffer.addUpdate(new Term("id", "8"), 12, 15);
assertFalse(buffer.hasSingleValue());
buffer.addUpdate(new Term("some_other_field", "8"), 13, 17); buffer.addUpdate(new Term("some_other_field", "8"), 13, 17);
assertFalse(buffer.hasSingleValue());
buffer.addUpdate(new Term("id", "8"), 12, 16); buffer.addUpdate(new Term("id", "8"), 12, 16);
assertFalse(buffer.hasSingleValue());
assertTrue(buffer.isNumeric()); assertTrue(buffer.isNumeric());
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator(); FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
FieldUpdatesBuffer.BufferedUpdate value = iterator.next(); FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
@ -214,14 +218,17 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
int count = 0; int count = 0;
while ((value = iterator.next()) != null) { while ((value = iterator.next()) != null) {
long v = buffer.getNumericValue(count);
randomUpdate = updates.get(count++); randomUpdate = updates.get(count++);
assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString()); assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
assertEquals(randomUpdate.term.field, value.termField); assertEquals(randomUpdate.term.field, value.termField);
assertEquals(randomUpdate.hasValue, value.hasValue); assertEquals(randomUpdate.hasValue, value.hasValue);
if (randomUpdate.hasValue) { if (randomUpdate.hasValue) {
assertEquals(randomUpdate.getValue(), value.numericValue); assertEquals(randomUpdate.getValue(), value.numericValue);
assertEquals(v, value.numericValue);
} else { } else {
assertEquals(0, value.numericValue); assertEquals(0, value.numericValue);
assertEquals(0, v);
} }
assertEquals(randomUpdate.docIDUpto, value.docUpTo); assertEquals(randomUpdate.docIDUpto, value.docUpTo);
} }