LUCENE-8598: Improve field updates packed values

DocValuesFieldUpdats are using compact settings for packet ints that causes
dramatic slowdowns when the updates are finished and sorted. Moving to the default
accepted overhead ratio yields up to 4x improvements in applying updates. This change
also improves the packing of numeric values since we know the value range in advance and
can choose a different packing scheme in such a case.
Overall this change yields a good performance improvement since 99% of the times of applying
DV field updates are spend in the sort method which essentially makes applying the updates
4x faster.
This commit is contained in:
Simon Willnauer 2018-12-09 19:13:20 +01:00
parent 202dad8c88
commit 0650f995f9
7 changed files with 67 additions and 15 deletions

View File

@ -266,7 +266,10 @@ Optimizations
* LUCENE-8590: BufferedUpdates now uses an optimized storage for buffering docvalues updates that
can safe up to 80% of the heap used compared to the previous implementation and uses non-object
based datastructures. (Simon Willnauer, Mike McCandless, Shai Erera, Adrien Grant)
based datastructures. (Simon Willnauer, Mike McCandless, Shai Erera, Adrien Grand)
* LUCENE-8598: Moved to the default accepted overhead ratio for packet ints in DocValuesFieldUpdats
yields an up-to 4x performance improvement when applying doc values updates. (Simon Willnauer, Adrien Grand)
Other

View File

@ -251,7 +251,7 @@ abstract class DocValuesFieldUpdates implements Accountable {
}
this.type = type;
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
}
final boolean getFinished() {

View File

@ -55,6 +55,8 @@ final class FieldUpdatesBuffer {
private int[] docsUpTo;
private long[] numericValues; // this will be null if we are buffering binaries
private FixedBitSet hasValues;
private long maxNumeric = Long.MIN_VALUE;
private long minNumeric = Long.MAX_VALUE;
private String[] fields;
private final boolean isNumeric;
@ -82,6 +84,7 @@ final class FieldUpdatesBuffer {
this(bytesUsed, initialValue, docUpTo, true);
if (initialValue.hasValue()) {
numericValues = new long[] {initialValue.getValue()};
maxNumeric = minNumeric = initialValue.getValue();
} else {
numericValues = new long[] {0};
}
@ -95,6 +98,22 @@ final class FieldUpdatesBuffer {
}
}
long getMaxNumeric() {
assert isNumeric;
if (minNumeric == Long.MAX_VALUE && maxNumeric == Long.MIN_VALUE) {
return 0; // we don't have any value;
}
return maxNumeric;
}
long getMinNumeric() {
assert isNumeric;
if (minNumeric == Long.MAX_VALUE && maxNumeric == Long.MIN_VALUE) {
return 0; // we don't have any value
}
return minNumeric;
}
void add(String field, int docUpTo, int ord, boolean hasValue) {
if (fields[0].equals(field) == false || fields.length != 1 ) {
if (fields.length <= ord) {
@ -144,6 +163,8 @@ final class FieldUpdatesBuffer {
final int ord = append(term);
String field = term.field;
add(field, docUpTo, ord, true);
minNumeric = Math.min(minNumeric, value);
maxNumeric = Math.max(maxNumeric, value);
if (numericValues[0] != value || numericValues.length != 1) {
if (numericValues.length <= ord) {
long[] array = ArrayUtil.grow(numericValues, ord+1);

View File

@ -542,7 +542,8 @@ final class FrozenBufferedUpdates {
.SingleValueNumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc(),
value.getNumericValue(0));
} else {
dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, value.getMinNumeric(),
value.getMaxNumeric(), segState.reader.maxDoc());
}
} else {
dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AbstractPagedMutable;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedGrowableWriter;
import org.apache.lucene.util.packed.PagedMutable;
@ -31,15 +32,16 @@ import org.apache.lucene.util.packed.PagedMutable;
* @lucene.experimental
*/
final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
// TODO: can't this just be NumericDocValues now? avoid boxing the long value...
final static class Iterator extends DocValuesFieldUpdates.AbstractIterator {
private final PagedGrowableWriter values;
private final AbstractPagedMutable values;
private final long minValue;
private long value;
Iterator(int size, PagedGrowableWriter values, PagedMutable docs, long delGen) {
Iterator(int size, long minValue, AbstractPagedMutable values, PagedMutable docs, long delGen) {
super(size, docs, delGen);
this.values = values;
this.minValue = minValue;
}
@Override
long longValue() {
@ -53,14 +55,25 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
@Override
protected void set(long idx) {
value = values.get(idx);
value = values.get(idx) + minValue;
}
}
private PagedGrowableWriter values;
private AbstractPagedMutable values;
private final long minValue;
public NumericDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
NumericDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
super(maxDoc, delGen, field, DocValuesType.NUMERIC);
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
// we don't know the min/max range so we use the growable writer here to adjust as we go.
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.DEFAULT);
minValue = 0;
}
NumericDocValuesFieldUpdates(long delGen, String field, long minValue, long maxValue, int maxDoc) {
super(maxDoc, delGen, field, DocValuesType.NUMERIC);
assert minValue <= maxValue : "minValue must be <= maxValue [" + minValue + " > " + maxValue + "]";
int bitsPerValue = PackedInts.unsignedBitsRequired(maxValue - minValue);
values = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
this.minValue = minValue;
}
@Override
void add(int doc, BytesRef value) {
@ -75,7 +88,7 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
@Override
synchronized void add(int doc, long value) {
int add = add(doc);
values.set(add, value);
values.set(add, value-minValue);
}
@Override
@ -101,7 +114,7 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
@Override
Iterator iterator() {
ensureFinished();
return new Iterator(size, values, docs, delGen);
return new Iterator(size, minValue, values, docs, delGen);
}
@Override

View File

@ -29,7 +29,7 @@ import org.apache.lucene.util.RamUsageEstimator;
* Base implementation for {@link PagedMutable} and {@link PagedGrowableWriter}.
* @lucene.internal
*/
abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues implements Accountable {
public abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues implements Accountable {
static final int MIN_BLOCK_SIZE = 1 << 6;
static final int MAX_BLOCK_SIZE = 1 << 30;
@ -161,5 +161,4 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends L
public final String toString() {
return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
}
}

View File

@ -30,7 +30,7 @@ import org.apache.lucene.util.TestUtil;
public class TestFieldUpdatesBuffer extends LuceneTestCase {
public void testBascis() throws IOException {
public void testBasics() throws IOException {
Counter counter = Counter.newCounter();
DocValuesUpdate.NumericDocValuesUpdate update =
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6);
@ -44,6 +44,8 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
buffer.addUpdate(new Term("id", "8"), 12, 16);
assertFalse(buffer.hasSingleValue());
assertTrue(buffer.isNumeric());
assertEquals(13, buffer.getMaxNumeric());
assertEquals(6, buffer.getMinNumeric());
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
assertNotNull(value);
@ -217,8 +219,12 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
FieldUpdatesBuffer.BufferedUpdate value;
int count = 0;
long min = Long.MAX_VALUE;
long max = Long.MIN_VALUE;
while ((value = iterator.next()) != null) {
long v = buffer.getNumericValue(count);
min = Math.min(min, v);
max = Math.max(max, v);
randomUpdate = updates.get(count++);
assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
assertEquals(randomUpdate.term.field, value.termField);
@ -232,7 +238,16 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
}
assertEquals(randomUpdate.docIDUpto, value.docUpTo);
}
assertEquals(max, buffer.getMaxNumeric());
assertEquals(min, buffer.getMinNumeric());
assertEquals(count, updates.size());
}
public void testNoNumericValue() {
DocValuesUpdate.NumericDocValuesUpdate update =
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", null);
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(Counter.newCounter(), update, update.docIDUpto);
assertEquals(0, buffer.getMinNumeric());
assertEquals(0, buffer.getMaxNumeric());
}
}