mirror of https://github.com/apache/lucene.git
LUCENE-9228: Sort dvUpdates by terms before apply
With this change, we sort dvUpdates in the term order before applying if they all update a single field to the same value. This optimization can reduce the flush time by around 20% for the docValues update user cases.
This commit is contained in:
parent
d5e51bf994
commit
83ccb8d2a2
|
@ -72,6 +72,10 @@ Improvements
|
||||||
* LUCENE-9194: Simplify XYShapeXQuery API by adding a new abstract class called XYGeometry. Queries are
|
* LUCENE-9194: Simplify XYShapeXQuery API by adding a new abstract class called XYGeometry. Queries are
|
||||||
executed with input objects that extend such interface. (Ignacio Vera)
|
executed with input objects that extend such interface. (Ignacio Vera)
|
||||||
|
|
||||||
|
* LUCENE-9228: Sort dvUpdates in the term order before applying if they all update a
|
||||||
|
single field to the same value. This optimization can reduce the flush time by around
|
||||||
|
20% for the docValues update user cases. (Nhat Nguyen, Adrien Grand, Simon Willnauer)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -51,6 +52,7 @@ final class FieldUpdatesBuffer {
|
||||||
// on CPU for those. We also save on not needing to sort in order to apply the terms in order
|
// on CPU for those. We also save on not needing to sort in order to apply the terms in order
|
||||||
// since by definition we store them in order.
|
// since by definition we store them in order.
|
||||||
private final BytesRefArray termValues;
|
private final BytesRefArray termValues;
|
||||||
|
private BytesRefArray.SortState termSortState;
|
||||||
private final BytesRefArray byteValues; // this will be null if we are buffering numerics
|
private final BytesRefArray byteValues; // this will be null if we are buffering numerics
|
||||||
private int[] docsUpTo;
|
private int[] docsUpTo;
|
||||||
private long[] numericValues; // this will be null if we are buffering binaries
|
private long[] numericValues; // this will be null if we are buffering binaries
|
||||||
|
@ -59,6 +61,7 @@ final class FieldUpdatesBuffer {
|
||||||
private long minNumeric = Long.MAX_VALUE;
|
private long minNumeric = Long.MAX_VALUE;
|
||||||
private String[] fields;
|
private String[] fields;
|
||||||
private final boolean isNumeric;
|
private final boolean isNumeric;
|
||||||
|
private boolean finished = false;
|
||||||
|
|
||||||
private FieldUpdatesBuffer(Counter bytesUsed, DocValuesUpdate initialValue, int docUpTo, boolean isNumeric) {
|
private FieldUpdatesBuffer(Counter bytesUsed, DocValuesUpdate initialValue, int docUpTo, boolean isNumeric) {
|
||||||
this.bytesUsed = bytesUsed;
|
this.bytesUsed = bytesUsed;
|
||||||
|
@ -115,6 +118,7 @@ final class FieldUpdatesBuffer {
|
||||||
}
|
}
|
||||||
|
|
||||||
void add(String field, int docUpTo, int ord, boolean hasValue) {
|
void add(String field, int docUpTo, int ord, boolean hasValue) {
|
||||||
|
assert finished == false : "buffer was finished already";
|
||||||
if (fields[0].equals(field) == false || fields.length != 1 ) {
|
if (fields[0].equals(field) == false || fields.length != 1 ) {
|
||||||
if (fields.length <= ord) {
|
if (fields.length <= ord) {
|
||||||
String[] array = ArrayUtil.grow(fields, ord+1);
|
String[] array = ArrayUtil.grow(fields, ord+1);
|
||||||
|
@ -195,7 +199,26 @@ final class FieldUpdatesBuffer {
|
||||||
return numUpdates++;
|
return numUpdates++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void finish() {
|
||||||
|
if (finished) {
|
||||||
|
throw new IllegalStateException("buffer was finished already");
|
||||||
|
}
|
||||||
|
finished = true;
|
||||||
|
final boolean sortedTerms = hasSingleValue() && hasValues == null && fields.length == 1;
|
||||||
|
if (sortedTerms) {
|
||||||
|
// sort by ascending by term, then sort descending by docsUpTo so that we can skip updates with lower docUpTo.
|
||||||
|
termSortState = termValues.sort(Comparator.naturalOrder(),
|
||||||
|
(i1, i2) -> Integer.compare(
|
||||||
|
docsUpTo[getArrayIndex(docsUpTo.length, i2)],
|
||||||
|
docsUpTo[getArrayIndex(docsUpTo.length, i1)]));
|
||||||
|
bytesUsed.addAndGet(termSortState.ramBytesUsed());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
BufferedUpdateIterator iterator() {
|
BufferedUpdateIterator iterator() {
|
||||||
|
if (finished == false) {
|
||||||
|
throw new IllegalStateException("buffer is not finished yet");
|
||||||
|
}
|
||||||
return new BufferedUpdateIterator();
|
return new BufferedUpdateIterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -264,26 +287,36 @@ final class FieldUpdatesBuffer {
|
||||||
* An iterator that iterates over all updates in insertion order
|
* An iterator that iterates over all updates in insertion order
|
||||||
*/
|
*/
|
||||||
class BufferedUpdateIterator {
|
class BufferedUpdateIterator {
|
||||||
private final BytesRefIterator termValuesIterator;
|
private final BytesRefArray.IndexedBytesRefIterator termValuesIterator;
|
||||||
|
private final BytesRefArray.IndexedBytesRefIterator lookAheadTermIterator;
|
||||||
private final BytesRefIterator byteValuesIterator;
|
private final BytesRefIterator byteValuesIterator;
|
||||||
private final BufferedUpdate bufferedUpdate = new BufferedUpdate();
|
private final BufferedUpdate bufferedUpdate = new BufferedUpdate();
|
||||||
private final Bits updatesWithValue;
|
private final Bits updatesWithValue;
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
BufferedUpdateIterator() {
|
BufferedUpdateIterator() {
|
||||||
this.termValuesIterator = termValues.iterator();
|
this.termValuesIterator = termValues.iterator(termSortState);
|
||||||
|
this.lookAheadTermIterator = termSortState != null ? termValues.iterator(termSortState) : null;
|
||||||
this.byteValuesIterator = isNumeric ? null : byteValues.iterator();
|
this.byteValuesIterator = isNumeric ? null : byteValues.iterator();
|
||||||
updatesWithValue = hasValues == null ? new Bits.MatchAllBits(numUpdates) : hasValues;
|
updatesWithValue = hasValues == null ? new Bits.MatchAllBits(numUpdates) : hasValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If all updates update a single field to the same value, then we can apply these
|
||||||
|
* updates in the term order instead of the request order as both will yield the same result.
|
||||||
|
* This optimization allows us to iterate the term dictionary faster and de-duplicate updates.
|
||||||
|
*/
|
||||||
|
boolean isSortedTerms() {
|
||||||
|
return termSortState != null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Moves to the next BufferedUpdate or return null if all updates are consumed.
|
* Moves to the next BufferedUpdate or return null if all updates are consumed.
|
||||||
* The returned instance is a shared instance and must be fully consumed before the next call to this method.
|
* The returned instance is a shared instance and must be fully consumed before the next call to this method.
|
||||||
*/
|
*/
|
||||||
BufferedUpdate next() throws IOException {
|
BufferedUpdate next() throws IOException {
|
||||||
BytesRef next = termValuesIterator.next();
|
BytesRef next = nextTerm();
|
||||||
if (next != null) {
|
if (next != null) {
|
||||||
final int idx = index++;
|
final int idx = termValuesIterator.ord();
|
||||||
bufferedUpdate.termValue = next;
|
bufferedUpdate.termValue = next;
|
||||||
bufferedUpdate.hasValue = updatesWithValue.get(idx);
|
bufferedUpdate.hasValue = updatesWithValue.get(idx);
|
||||||
bufferedUpdate.termField = fields[getArrayIndex(fields.length, idx)];
|
bufferedUpdate.termField = fields[getArrayIndex(fields.length, idx)];
|
||||||
|
@ -304,6 +337,20 @@ final class FieldUpdatesBuffer {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BytesRef nextTerm() throws IOException {
|
||||||
|
if (lookAheadTermIterator != null) {
|
||||||
|
final BytesRef lastTerm = bufferedUpdate.termValue;
|
||||||
|
BytesRef lookAheadTerm;
|
||||||
|
while ((lookAheadTerm = lookAheadTermIterator.next()) != null && lookAheadTerm.equals(lastTerm)) {
|
||||||
|
BytesRef discardedTerm = termValuesIterator.next(); // discard as the docUpTo of the previous update is higher
|
||||||
|
assert discardedTerm.equals(lookAheadTerm) : "[" + discardedTerm + "] != [" + lookAheadTerm + "]";
|
||||||
|
assert docsUpTo[getArrayIndex(docsUpTo.length, termValuesIterator.ord())] <= bufferedUpdate.docUpTo :
|
||||||
|
docsUpTo[getArrayIndex(docsUpTo.length, termValuesIterator.ord())] + ">" + bufferedUpdate.docUpTo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return termValuesIterator.next();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getArrayIndex(int arrayLength, int index) {
|
private static int getArrayIndex(int arrayLength, int index) {
|
||||||
|
|
|
@ -110,6 +110,7 @@ final class FrozenBufferedUpdates {
|
||||||
// so that it maps to all fields it affects, sorted by their docUpto, and traverse
|
// so that it maps to all fields it affects, sorted by their docUpto, and traverse
|
||||||
// that Term only once, applying the update to all fields that still need to be
|
// that Term only once, applying the update to all fields that still need to be
|
||||||
// updated.
|
// updated.
|
||||||
|
updates.fieldUpdates.values().forEach(FieldUpdatesBuffer::finish);
|
||||||
this.fieldUpdates = Collections.unmodifiableMap(new HashMap<>(updates.fieldUpdates));
|
this.fieldUpdates = Collections.unmodifiableMap(new HashMap<>(updates.fieldUpdates));
|
||||||
this.fieldUpdatesCount = updates.numFieldUpdates.get();
|
this.fieldUpdatesCount = updates.numFieldUpdates.get();
|
||||||
|
|
||||||
|
@ -491,7 +492,7 @@ final class FrozenBufferedUpdates {
|
||||||
boolean isNumeric = value.isNumeric();
|
boolean isNumeric = value.isNumeric();
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
|
||||||
FieldUpdatesBuffer.BufferedUpdate bufferedUpdate;
|
FieldUpdatesBuffer.BufferedUpdate bufferedUpdate;
|
||||||
TermDocsIterator termDocsIterator = new TermDocsIterator(segState.reader, false);
|
TermDocsIterator termDocsIterator = new TermDocsIterator(segState.reader, iterator.isSortedTerms());
|
||||||
while ((bufferedUpdate = iterator.next()) != null) {
|
while ((bufferedUpdate = iterator.next()) != null) {
|
||||||
// TODO: we traverse the terms in update order (not term order) so that we
|
// TODO: we traverse the terms in update order (not term order) so that we
|
||||||
// apply the updates in the correct order, i.e. if two terms update the
|
// apply the updates in the correct order, i.e. if two terms update the
|
||||||
|
@ -521,7 +522,6 @@ final class FrozenBufferedUpdates {
|
||||||
longValue = bufferedUpdate.numericValue;
|
longValue = bufferedUpdate.numericValue;
|
||||||
binaryValue = bufferedUpdate.binaryValue;
|
binaryValue = bufferedUpdate.binaryValue;
|
||||||
}
|
}
|
||||||
termDocsIterator.getDocs();
|
|
||||||
if (dvUpdates == null) {
|
if (dvUpdates == null) {
|
||||||
if (isNumeric) {
|
if (isNumeric) {
|
||||||
if (value.hasSingleValue()) {
|
if (value.hasSingleValue()) {
|
||||||
|
@ -825,7 +825,7 @@ final class FrozenBufferedUpdates {
|
||||||
return null; // requested term does not exist in this segment
|
return null; // requested term does not exist in this segment
|
||||||
} else if (cmp == 0) {
|
} else if (cmp == 0) {
|
||||||
return getDocs();
|
return getDocs();
|
||||||
} else if (cmp > 0) {
|
} else {
|
||||||
TermsEnum.SeekStatus status = termsEnum.seekCeil(term);
|
TermsEnum.SeekStatus status = termsEnum.seekCeil(term);
|
||||||
switch (status) {
|
switch (status) {
|
||||||
case FOUND:
|
case FOUND:
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.util;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.function.IntBinaryOperator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple append only random-access {@link BytesRef} array that stores full
|
* A simple append only random-access {@link BytesRef} array that stores full
|
||||||
|
@ -117,7 +118,11 @@ public final class BytesRefArray implements SortableBytesRefArray {
|
||||||
pool.setBytesRef(spare, result, offset, length);
|
pool.setBytesRef(spare, result, offset, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int[] sort(final Comparator<BytesRef> comp) {
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link SortState} representing the order of elements in this array. This is a non-destructive operation.
|
||||||
|
*/
|
||||||
|
public SortState sort(final Comparator<BytesRef> comp, final IntBinaryOperator tieComparator) {
|
||||||
final int[] orderedEntries = new int[size()];
|
final int[] orderedEntries = new int[size()];
|
||||||
for (int i = 0; i < orderedEntries.length; i++) {
|
for (int i = 0; i < orderedEntries.length; i++) {
|
||||||
orderedEntries[i] = i;
|
orderedEntries[i] = i;
|
||||||
|
@ -135,22 +140,28 @@ public final class BytesRefArray implements SortableBytesRefArray {
|
||||||
final int idx1 = orderedEntries[i], idx2 = orderedEntries[j];
|
final int idx1 = orderedEntries[i], idx2 = orderedEntries[j];
|
||||||
setBytesRef(scratch1, scratchBytes1, idx1);
|
setBytesRef(scratch1, scratchBytes1, idx1);
|
||||||
setBytesRef(scratch2, scratchBytes2, idx2);
|
setBytesRef(scratch2, scratchBytes2, idx2);
|
||||||
return comp.compare(scratchBytes1, scratchBytes2);
|
return compare(idx1, scratchBytes1, idx2, scratchBytes2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setPivot(int i) {
|
protected void setPivot(int i) {
|
||||||
final int index = orderedEntries[i];
|
pivotIndex = orderedEntries[i];
|
||||||
setBytesRef(pivotBuilder, pivot, index);
|
setBytesRef(pivotBuilder, pivot, pivotIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
final int index = orderedEntries[j];
|
final int index = orderedEntries[j];
|
||||||
setBytesRef(scratch2, scratchBytes2, index);
|
setBytesRef(scratch2, scratchBytes2, index);
|
||||||
return comp.compare(pivot, scratchBytes2);
|
return compare(pivotIndex, pivot, index, scratchBytes2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int compare(int i1, BytesRef b1, int i2, BytesRef b2) {
|
||||||
|
int res = comp.compare(b1, b2);
|
||||||
|
return res == 0 ? tieComparator.applyAsInt(i1, i2) : res;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int pivotIndex;
|
||||||
private final BytesRef pivot = new BytesRef();
|
private final BytesRef pivot = new BytesRef();
|
||||||
private final BytesRef scratchBytes1 = new BytesRef();
|
private final BytesRef scratchBytes1 = new BytesRef();
|
||||||
private final BytesRef scratchBytes2 = new BytesRef();
|
private final BytesRef scratchBytes2 = new BytesRef();
|
||||||
|
@ -158,14 +169,14 @@ public final class BytesRefArray implements SortableBytesRefArray {
|
||||||
private final BytesRefBuilder scratch1 = new BytesRefBuilder();
|
private final BytesRefBuilder scratch1 = new BytesRefBuilder();
|
||||||
private final BytesRefBuilder scratch2 = new BytesRefBuilder();
|
private final BytesRefBuilder scratch2 = new BytesRefBuilder();
|
||||||
}.sort(0, size());
|
}.sort(0, size());
|
||||||
return orderedEntries;
|
return new SortState(orderedEntries);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sugar for {@link #iterator(Comparator)} with a <code>null</code> comparator
|
* sugar for {@link #iterator(Comparator)} with a <code>null</code> comparator
|
||||||
*/
|
*/
|
||||||
public BytesRefIterator iterator() {
|
public BytesRefIterator iterator() {
|
||||||
return iterator(null);
|
return iterator((SortState) null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -184,20 +195,66 @@ public final class BytesRefArray implements SortableBytesRefArray {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public BytesRefIterator iterator(final Comparator<BytesRef> comp) {
|
public BytesRefIterator iterator(final Comparator<BytesRef> comp) {
|
||||||
|
return iterator(sort(comp, (i, j) -> 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an {@link IndexedBytesRefIterator} with point in time semantics. The iterator provides access to all
|
||||||
|
* so far appended {@link BytesRef} instances. If a non-null sortState is specified then the iterator will iterate
|
||||||
|
* the byte values in the order of the sortState; otherwise, the order is the same as the values were appended.
|
||||||
|
*/
|
||||||
|
public IndexedBytesRefIterator iterator(final SortState sortState) {
|
||||||
|
final int size = size();
|
||||||
|
final int[] indices = sortState == null ? null : sortState.indices;
|
||||||
|
assert indices == null || indices.length == size : indices.length + " != " + size;
|
||||||
final BytesRefBuilder spare = new BytesRefBuilder();
|
final BytesRefBuilder spare = new BytesRefBuilder();
|
||||||
final BytesRef result = new BytesRef();
|
final BytesRef result = new BytesRef();
|
||||||
final int size = size();
|
|
||||||
final int[] indices = comp == null ? null : sort(comp);
|
return new IndexedBytesRefIterator() {
|
||||||
return new BytesRefIterator() {
|
int pos = -1;
|
||||||
int pos = 0;
|
int ord = 0;
|
||||||
@Override
|
@Override
|
||||||
public BytesRef next() {
|
public BytesRef next() {
|
||||||
|
++pos;
|
||||||
if (pos < size) {
|
if (pos < size) {
|
||||||
setBytesRef(spare, result, indices == null ? pos++ : indices[pos++]);
|
ord = indices == null ? pos : indices[pos];
|
||||||
|
setBytesRef(spare, result, ord);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ord() {
|
||||||
|
return ord;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to iterate the elements of an array in a given order.
|
||||||
|
*/
|
||||||
|
public final static class SortState implements Accountable {
|
||||||
|
private final int[] indices;
|
||||||
|
|
||||||
|
private SortState(int[] indices) {
|
||||||
|
this.indices = indices;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + indices.length * Integer.BYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An extension of {@link BytesRefIterator} that allows retrieving the index of the current element
|
||||||
|
*/
|
||||||
|
public interface IndexedBytesRefIterator extends BytesRefIterator {
|
||||||
|
/**
|
||||||
|
* Returns the ordinal position of the element that was returned in the latest call of {@link #next()}.
|
||||||
|
* Do not call this method if {@link #next()} is not called yet or the last call returned a null value.
|
||||||
|
*/
|
||||||
|
int ord();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,10 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.SortedMap;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -46,6 +49,7 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
assertTrue(buffer.isNumeric());
|
assertTrue(buffer.isNumeric());
|
||||||
assertEquals(13, buffer.getMaxNumeric());
|
assertEquals(13, buffer.getMaxNumeric());
|
||||||
assertEquals(6, buffer.getMinNumeric());
|
assertEquals(6, buffer.getMinNumeric());
|
||||||
|
buffer.finish();
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
||||||
FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
|
FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
|
||||||
assertNotNull(value);
|
assertNotNull(value);
|
||||||
|
@ -99,6 +103,7 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
buffer.addNoValue(new Term("id", "3"), Integer.MAX_VALUE);
|
buffer.addNoValue(new Term("id", "3"), Integer.MAX_VALUE);
|
||||||
}
|
}
|
||||||
buffer.addUpdate(new Term("id", "4"), intValue, Integer.MAX_VALUE);
|
buffer.addUpdate(new Term("id", "4"), intValue, Integer.MAX_VALUE);
|
||||||
|
buffer.finish();
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
||||||
FieldUpdatesBuffer.BufferedUpdate value;
|
FieldUpdatesBuffer.BufferedUpdate value;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
@ -131,6 +136,7 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
buffer.addNoValue(new Term("id", "3"), Integer.MAX_VALUE);
|
buffer.addNoValue(new Term("id", "3"), Integer.MAX_VALUE);
|
||||||
}
|
}
|
||||||
buffer.addUpdate(new Term("id", "4"), new BytesRef(""), Integer.MAX_VALUE);
|
buffer.addUpdate(new Term("id", "4"), new BytesRef(""), Integer.MAX_VALUE);
|
||||||
|
buffer.finish();
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
||||||
FieldUpdatesBuffer.BufferedUpdate value;
|
FieldUpdatesBuffer.BufferedUpdate value;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
@ -149,12 +155,20 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
assertFalse(buffer.isNumeric());
|
assertFalse(buffer.isNumeric());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int randomDocUpTo() {
|
||||||
|
if (random().nextInt(5) == 0) {
|
||||||
|
return Integer.MAX_VALUE;
|
||||||
|
} else {
|
||||||
|
return random().nextInt(10000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DocValuesUpdate.BinaryDocValuesUpdate getRandomBinaryUpdate() {
|
DocValuesUpdate.BinaryDocValuesUpdate getRandomBinaryUpdate() {
|
||||||
String termField = RandomPicks.randomFrom(random(), Arrays.asList("id", "_id", "some_other_field"));
|
String termField = RandomPicks.randomFrom(random(), Arrays.asList("id", "_id", "some_other_field"));
|
||||||
String docId = "" + random().nextInt(10);
|
String docId = "" + random().nextInt(10);
|
||||||
DocValuesUpdate.BinaryDocValuesUpdate value = new DocValuesUpdate.BinaryDocValuesUpdate(new Term(termField, docId), "binary",
|
DocValuesUpdate.BinaryDocValuesUpdate value = new DocValuesUpdate.BinaryDocValuesUpdate(new Term(termField, docId), "binary",
|
||||||
rarely() ? null : new BytesRef(TestUtil.randomRealisticUnicodeString(random())));
|
rarely() ? null : new BytesRef(TestUtil.randomRealisticUnicodeString(random())));
|
||||||
return rarely() ? value.prepareForApply(random().nextInt(100)) : value;
|
return rarely() ? value.prepareForApply(randomDocUpTo()) : value;
|
||||||
}
|
}
|
||||||
|
|
||||||
DocValuesUpdate.NumericDocValuesUpdate getRandomNumericUpdate() {
|
DocValuesUpdate.NumericDocValuesUpdate getRandomNumericUpdate() {
|
||||||
|
@ -162,7 +176,7 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
String docId = "" + random().nextInt(10);
|
String docId = "" + random().nextInt(10);
|
||||||
DocValuesUpdate.NumericDocValuesUpdate value = new DocValuesUpdate.NumericDocValuesUpdate(new Term(termField, docId), "numeric",
|
DocValuesUpdate.NumericDocValuesUpdate value = new DocValuesUpdate.NumericDocValuesUpdate(new Term(termField, docId), "numeric",
|
||||||
rarely() ? null : Long.valueOf(random().nextInt(100)));
|
rarely() ? null : Long.valueOf(random().nextInt(100)));
|
||||||
return rarely() ? value.prepareForApply(random().nextInt(100)) : value;
|
return rarely() ? value.prepareForApply(randomDocUpTo()) : value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testBinaryRandom() throws IOException {
|
public void testBinaryRandom() throws IOException {
|
||||||
|
@ -181,6 +195,7 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
buffer.addNoValue(randomUpdate.term, randomUpdate.docIDUpto);
|
buffer.addNoValue(randomUpdate.term, randomUpdate.docIDUpto);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
buffer.finish();
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
||||||
FieldUpdatesBuffer.BufferedUpdate value;
|
FieldUpdatesBuffer.BufferedUpdate value;
|
||||||
|
|
||||||
|
@ -216,6 +231,55 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
buffer.addNoValue(randomUpdate.term, randomUpdate.docIDUpto);
|
buffer.addNoValue(randomUpdate.term, randomUpdate.docIDUpto);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
buffer.finish();
|
||||||
|
DocValuesUpdate.NumericDocValuesUpdate lastUpdate = randomUpdate;
|
||||||
|
boolean termsSorted = lastUpdate.hasValue && updates.stream()
|
||||||
|
.allMatch(update -> update.field.equals(lastUpdate.field) &&
|
||||||
|
update.hasValue && update.getValue() == lastUpdate.getValue());
|
||||||
|
assertBufferUpdates(buffer, updates, termsSorted);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNoNumericValue() {
|
||||||
|
DocValuesUpdate.NumericDocValuesUpdate update =
|
||||||
|
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", null);
|
||||||
|
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(Counter.newCounter(), update, update.docIDUpto);
|
||||||
|
assertEquals(0, buffer.getMinNumeric());
|
||||||
|
assertEquals(0, buffer.getMaxNumeric());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortAndDedupNumericUpdatesByTerms() throws IOException {
|
||||||
|
List<DocValuesUpdate.NumericDocValuesUpdate> updates = new ArrayList<>();
|
||||||
|
int numUpdates = 1 + random().nextInt(1000);
|
||||||
|
Counter counter = Counter.newCounter();
|
||||||
|
String termField = RandomPicks.randomFrom(random(), Arrays.asList("id", "_id", "some_other_field"));
|
||||||
|
long docValue = 1 + random().nextInt(1000);
|
||||||
|
DocValuesUpdate.NumericDocValuesUpdate randomUpdate = new DocValuesUpdate.NumericDocValuesUpdate(
|
||||||
|
new Term(termField, Integer.toString(random().nextInt(1000))), "numeric", docValue);
|
||||||
|
randomUpdate = randomUpdate.prepareForApply(randomDocUpTo());
|
||||||
|
updates.add(randomUpdate);
|
||||||
|
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(counter, randomUpdate, randomUpdate.docIDUpto);
|
||||||
|
for (int i = 0; i < numUpdates; i++) {
|
||||||
|
randomUpdate = new DocValuesUpdate.NumericDocValuesUpdate(
|
||||||
|
new Term(termField, Integer.toString(random().nextInt(1000))), "numeric", docValue);
|
||||||
|
randomUpdate = randomUpdate.prepareForApply(randomDocUpTo());
|
||||||
|
updates.add(randomUpdate);
|
||||||
|
buffer.addUpdate(randomUpdate.term, randomUpdate.getValue(), randomUpdate.docIDUpto);
|
||||||
|
}
|
||||||
|
buffer.finish();
|
||||||
|
assertBufferUpdates(buffer, updates, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void assertBufferUpdates(FieldUpdatesBuffer buffer,
|
||||||
|
List<DocValuesUpdate.NumericDocValuesUpdate> updates,
|
||||||
|
boolean termSorted) throws IOException {
|
||||||
|
if (termSorted) {
|
||||||
|
updates.sort(Comparator.comparing(u -> u.term.bytes));
|
||||||
|
SortedMap<BytesRef, DocValuesUpdate.NumericDocValuesUpdate> byTerms = new TreeMap<>();
|
||||||
|
for (DocValuesUpdate.NumericDocValuesUpdate update : updates) {
|
||||||
|
byTerms.compute(update.term.bytes, (k, v) -> v != null && v.docIDUpto >= update.docIDUpto ? v : update);
|
||||||
|
}
|
||||||
|
updates = new ArrayList<>(byTerms.values());
|
||||||
|
}
|
||||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
|
||||||
FieldUpdatesBuffer.BufferedUpdate value;
|
FieldUpdatesBuffer.BufferedUpdate value;
|
||||||
|
|
||||||
|
@ -223,14 +287,15 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
long min = Long.MAX_VALUE;
|
long min = Long.MAX_VALUE;
|
||||||
long max = Long.MIN_VALUE;
|
long max = Long.MIN_VALUE;
|
||||||
boolean hasAtLeastOneValue = false;
|
boolean hasAtLeastOneValue = false;
|
||||||
|
DocValuesUpdate.NumericDocValuesUpdate expectedUpdate;
|
||||||
while ((value = iterator.next()) != null) {
|
while ((value = iterator.next()) != null) {
|
||||||
long v = buffer.getNumericValue(count);
|
long v = buffer.getNumericValue(count);
|
||||||
randomUpdate = updates.get(count++);
|
expectedUpdate = updates.get(count++);
|
||||||
assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
|
assertEquals(expectedUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
|
||||||
assertEquals(randomUpdate.term.field, value.termField);
|
assertEquals(expectedUpdate.term.field, value.termField);
|
||||||
assertEquals(randomUpdate.hasValue, value.hasValue);
|
assertEquals(expectedUpdate.hasValue, value.hasValue);
|
||||||
if (randomUpdate.hasValue) {
|
if (expectedUpdate.hasValue) {
|
||||||
assertEquals(randomUpdate.getValue(), value.numericValue);
|
assertEquals(expectedUpdate.getValue(), value.numericValue);
|
||||||
assertEquals(v, value.numericValue);
|
assertEquals(v, value.numericValue);
|
||||||
min = Math.min(min, v);
|
min = Math.min(min, v);
|
||||||
max = Math.max(max, v);
|
max = Math.max(max, v);
|
||||||
|
@ -239,7 +304,7 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
assertEquals(0, value.numericValue);
|
assertEquals(0, value.numericValue);
|
||||||
assertEquals(0, v);
|
assertEquals(0, v);
|
||||||
}
|
}
|
||||||
assertEquals(randomUpdate.docIDUpto, value.docUpTo);
|
assertEquals(expectedUpdate.docIDUpto, value.docUpTo);
|
||||||
}
|
}
|
||||||
if (hasAtLeastOneValue) {
|
if (hasAtLeastOneValue) {
|
||||||
assertEquals(max, buffer.getMaxNumeric());
|
assertEquals(max, buffer.getMaxNumeric());
|
||||||
|
@ -250,12 +315,4 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
assertEquals(count, updates.size());
|
assertEquals(count, updates.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNoNumericValue() {
|
|
||||||
DocValuesUpdate.NumericDocValuesUpdate update =
|
|
||||||
new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", null);
|
|
||||||
FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(Counter.newCounter(), update, update.docIDUpto);
|
|
||||||
assertEquals(0, buffer.getMinNumeric());
|
|
||||||
assertEquals(0, buffer.getMaxNumeric());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue