mirror of https://github.com/apache/lucene.git
LUCENE-8598: Improve field updates packed values
DocValuesFieldUpdats are using compact settings for packet ints that causes dramatic slowdowns when the updates are finished and sorted. Moving to the default accepted overhead ratio yields up to 4x improvements in applying updates. This change also improves the packing of numeric values since we know the value range in advance and can choose a different packing scheme in such a case. Overall this change yields a good performance improvement since 99% of the times of applying DV field updates are spend in the sort method which essentially makes applying the updates 4x faster.
This commit is contained in:
parent
202dad8c88
commit
0650f995f9
|
@ -266,7 +266,10 @@ Optimizations
|
|||
|
||||
* LUCENE-8590: BufferedUpdates now uses an optimized storage for buffering docvalues updates that
|
||||
can safe up to 80% of the heap used compared to the previous implementation and uses non-object
|
||||
based datastructures. (Simon Willnauer, Mike McCandless, Shai Erera, Adrien Grant)
|
||||
based datastructures. (Simon Willnauer, Mike McCandless, Shai Erera, Adrien Grand)
|
||||
|
||||
* LUCENE-8598: Moved to the default accepted overhead ratio for packet ints in DocValuesFieldUpdats
|
||||
yields an up-to 4x performance improvement when applying doc values updates. (Simon Willnauer, Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
|
|
|
@ -251,7 +251,7 @@ abstract class DocValuesFieldUpdates implements Accountable {
|
|||
}
|
||||
this.type = type;
|
||||
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
|
||||
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
|
||||
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
final boolean getFinished() {
|
||||
|
|
|
@ -55,6 +55,8 @@ final class FieldUpdatesBuffer {
|
|||
private int[] docsUpTo;
|
||||
private long[] numericValues; // this will be null if we are buffering binaries
|
||||
private FixedBitSet hasValues;
|
||||
private long maxNumeric = Long.MIN_VALUE;
|
||||
private long minNumeric = Long.MAX_VALUE;
|
||||
private String[] fields;
|
||||
private final boolean isNumeric;
|
||||
|
||||
|
@ -82,6 +84,7 @@ final class FieldUpdatesBuffer {
|
|||
this(bytesUsed, initialValue, docUpTo, true);
|
||||
if (initialValue.hasValue()) {
|
||||
numericValues = new long[] {initialValue.getValue()};
|
||||
maxNumeric = minNumeric = initialValue.getValue();
|
||||
} else {
|
||||
numericValues = new long[] {0};
|
||||
}
|
||||
|
@ -95,6 +98,22 @@ final class FieldUpdatesBuffer {
|
|||
}
|
||||
}
|
||||
|
||||
long getMaxNumeric() {
|
||||
assert isNumeric;
|
||||
if (minNumeric == Long.MAX_VALUE && maxNumeric == Long.MIN_VALUE) {
|
||||
return 0; // we don't have any value;
|
||||
}
|
||||
return maxNumeric;
|
||||
}
|
||||
|
||||
long getMinNumeric() {
|
||||
assert isNumeric;
|
||||
if (minNumeric == Long.MAX_VALUE && maxNumeric == Long.MIN_VALUE) {
|
||||
return 0; // we don't have any value
|
||||
}
|
||||
return minNumeric;
|
||||
}
|
||||
|
||||
void add(String field, int docUpTo, int ord, boolean hasValue) {
|
||||
if (fields[0].equals(field) == false || fields.length != 1 ) {
|
||||
if (fields.length <= ord) {
|
||||
|
@ -144,6 +163,8 @@ final class FieldUpdatesBuffer {
|
|||
final int ord = append(term);
|
||||
String field = term.field;
|
||||
add(field, docUpTo, ord, true);
|
||||
minNumeric = Math.min(minNumeric, value);
|
||||
maxNumeric = Math.max(maxNumeric, value);
|
||||
if (numericValues[0] != value || numericValues.length != 1) {
|
||||
if (numericValues.length <= ord) {
|
||||
long[] array = ArrayUtil.grow(numericValues, ord+1);
|
||||
|
|
|
@ -542,7 +542,8 @@ final class FrozenBufferedUpdates {
|
|||
.SingleValueNumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc(),
|
||||
value.getNumericValue(0));
|
||||
} else {
|
||||
dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
||||
dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, value.getMinNumeric(),
|
||||
value.getMaxNumeric(), segState.reader.maxDoc());
|
||||
}
|
||||
} else {
|
||||
dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AbstractPagedMutable;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PagedGrowableWriter;
|
||||
import org.apache.lucene.util.packed.PagedMutable;
|
||||
|
@ -31,15 +32,16 @@ import org.apache.lucene.util.packed.PagedMutable;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||
|
||||
// TODO: can't this just be NumericDocValues now? avoid boxing the long value...
|
||||
final static class Iterator extends DocValuesFieldUpdates.AbstractIterator {
|
||||
private final PagedGrowableWriter values;
|
||||
private final AbstractPagedMutable values;
|
||||
private final long minValue;
|
||||
private long value;
|
||||
|
||||
Iterator(int size, PagedGrowableWriter values, PagedMutable docs, long delGen) {
|
||||
Iterator(int size, long minValue, AbstractPagedMutable values, PagedMutable docs, long delGen) {
|
||||
super(size, docs, delGen);
|
||||
this.values = values;
|
||||
this.minValue = minValue;
|
||||
}
|
||||
@Override
|
||||
long longValue() {
|
||||
|
@ -53,14 +55,25 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
|
||||
@Override
|
||||
protected void set(long idx) {
|
||||
value = values.get(idx);
|
||||
value = values.get(idx) + minValue;
|
||||
}
|
||||
}
|
||||
private PagedGrowableWriter values;
|
||||
private AbstractPagedMutable values;
|
||||
private final long minValue;
|
||||
|
||||
public NumericDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
|
||||
NumericDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
|
||||
super(maxDoc, delGen, field, DocValuesType.NUMERIC);
|
||||
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
|
||||
// we don't know the min/max range so we use the growable writer here to adjust as we go.
|
||||
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.DEFAULT);
|
||||
minValue = 0;
|
||||
}
|
||||
|
||||
NumericDocValuesFieldUpdates(long delGen, String field, long minValue, long maxValue, int maxDoc) {
|
||||
super(maxDoc, delGen, field, DocValuesType.NUMERIC);
|
||||
assert minValue <= maxValue : "minValue must be <= maxValue [" + minValue + " > " + maxValue + "]";
|
||||
int bitsPerValue = PackedInts.unsignedBitsRequired(maxValue - minValue);
|
||||
values = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
|
||||
this.minValue = minValue;
|
||||
}
|
||||
@Override
|
||||
void add(int doc, BytesRef value) {
|
||||
|
@ -75,7 +88,7 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
@Override
|
||||
synchronized void add(int doc, long value) {
|
||||
int add = add(doc);
|
||||
values.set(add, value);
|
||||
values.set(add, value-minValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -101,7 +114,7 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
@Override
|
||||
Iterator iterator() {
|
||||
ensureFinished();
|
||||
return new Iterator(size, values, docs, delGen);
|
||||
return new Iterator(size, minValue, values, docs, delGen);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* Base implementation for {@link PagedMutable} and {@link PagedGrowableWriter}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues implements Accountable {
|
||||
public abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues implements Accountable {
|
||||
|
||||
static final int MIN_BLOCK_SIZE = 1 << 6;
|
||||
static final int MAX_BLOCK_SIZE = 1 << 30;
|
||||
|
@ -161,5 +161,4 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends L
|
|||
public final String toString() {
|
||||
return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
|
||||
public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||
|
||||
public void testBascis() throws IOException {
|
||||
public void testBasics() throws IOException {
|
||||
Counter counter = Counter.newCounter();
|
||||
DocValuesUpdate.NumericDocValuesUpdate update =
|
||||
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6);
|
||||
|
@ -44,6 +44,8 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
|||
buffer.addUpdate(new Term("id", "8"), 12, 16);
|
||||
assertFalse(buffer.hasSingleValue());
|
||||
assertTrue(buffer.isNumeric());
|
||||
assertEquals(13, buffer.getMaxNumeric());
|
||||
assertEquals(6, buffer.getMinNumeric());
|
||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
||||
FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
|
||||
assertNotNull(value);
|
||||
|
@ -217,8 +219,12 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
|||
FieldUpdatesBuffer.BufferedUpdate value;
|
||||
|
||||
int count = 0;
|
||||
long min = Long.MAX_VALUE;
|
||||
long max = Long.MIN_VALUE;
|
||||
while ((value = iterator.next()) != null) {
|
||||
long v = buffer.getNumericValue(count);
|
||||
min = Math.min(min, v);
|
||||
max = Math.max(max, v);
|
||||
randomUpdate = updates.get(count++);
|
||||
assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
|
||||
assertEquals(randomUpdate.term.field, value.termField);
|
||||
|
@ -232,7 +238,16 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
|||
}
|
||||
assertEquals(randomUpdate.docIDUpto, value.docUpTo);
|
||||
}
|
||||
assertEquals(max, buffer.getMaxNumeric());
|
||||
assertEquals(min, buffer.getMinNumeric());
|
||||
assertEquals(count, updates.size());
|
||||
}
|
||||
|
||||
public void testNoNumericValue() {
|
||||
DocValuesUpdate.NumericDocValuesUpdate update =
|
||||
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", null);
|
||||
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(Counter.newCounter(), update, update.docIDUpto);
|
||||
assertEquals(0, buffer.getMinNumeric());
|
||||
assertEquals(0, buffer.getMaxNumeric());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue