mirror of https://github.com/apache/lucene.git
LUCENE-8282: Reduce boxing and unnecessary object creation in DV updates
DV updates used the boxed type Long to keep API generic. Yet, the missing type caused a lot of code duplication, boxing and unnecessary object creation. This change cuts over to type safe APIs using BytesRef and long (the primitive) In this change most of the code that is almost identical between binary and numeric is not shared reducing the maintenance overhead and likelihood of introducing bugs.
This commit is contained in:
parent
570fff8672
commit
b43b09190d
|
@ -33,7 +33,7 @@ import org.apache.lucene.util.packed.PagedMutable;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
final class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
|
|
||||||
final static class Iterator extends DocValuesFieldUpdates.Iterator {
|
final static class Iterator extends DocValuesFieldUpdates.Iterator {
|
||||||
private final int size;
|
private final int size;
|
||||||
|
@ -55,14 +55,14 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
value = values.clone();
|
value = values.clone();
|
||||||
this.delGen = delGen;
|
this.delGen = delGen;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
BytesRef value() {
|
BytesRef binaryValue() {
|
||||||
value.offset = offset;
|
value.offset = offset;
|
||||||
value.length = length;
|
value.length = length;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() {
|
public int nextDoc() {
|
||||||
if (idx >= size) {
|
if (idx >= size) {
|
||||||
|
@ -94,6 +94,11 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
long delGen() {
|
long delGen() {
|
||||||
return delGen;
|
return delGen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
long longValue() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private PagedMutable docs;
|
private PagedMutable docs;
|
||||||
|
@ -116,9 +121,18 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: we fully consume the incoming BytesRef so caller is free to reuse it after we return:
|
|
||||||
@Override
|
@Override
|
||||||
synchronized public void add(int doc, Object value) {
|
public void add(int doc, long value) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docId, DocValuesFieldUpdates.Iterator iterator) {
|
||||||
|
add(docId, iterator.binaryValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
synchronized public void add(int doc, BytesRef value) {
|
||||||
if (finished) {
|
if (finished) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
|
@ -130,8 +144,6 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
|
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
|
||||||
}
|
}
|
||||||
|
|
||||||
BytesRef val = (BytesRef) value;
|
|
||||||
|
|
||||||
// grow the structures to have room for more elements
|
// grow the structures to have room for more elements
|
||||||
if (docs.size() == size) {
|
if (docs.size() == size) {
|
||||||
docs = docs.grow(size + 1);
|
docs = docs.grow(size + 1);
|
||||||
|
@ -141,8 +153,8 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
|
|
||||||
docs.set(size, doc);
|
docs.set(size, doc);
|
||||||
offsets.set(size, values.length());
|
offsets.set(size, values.length());
|
||||||
lengths.set(size, val.length);
|
lengths.set(size, value.length);
|
||||||
values.append(val);
|
values.append(value);
|
||||||
++size;
|
++size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.function.IntFunction;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
||||||
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
||||||
|
@ -233,62 +234,49 @@ class BufferedUpdates {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addNumericUpdate(NumericDocValuesUpdate update, int docIDUpto) {
|
void addNumericUpdate(NumericDocValuesUpdate update, int docIDUpto) {
|
||||||
LinkedHashMap<Term,NumericDocValuesUpdate> fieldUpdates = numericUpdates.get(update.field);
|
if (addDocValuesUpdate(numericUpdates, update, docIDUpto, update::prepareForApply, BYTES_PER_NUMERIC_UPDATE_ENTRY,
|
||||||
if (fieldUpdates == null) {
|
BYTES_PER_NUMERIC_FIELD_ENTRY)) {
|
||||||
fieldUpdates = new LinkedHashMap<>();
|
numNumericUpdates.incrementAndGet();
|
||||||
numericUpdates.put(update.field, fieldUpdates);
|
|
||||||
bytesUsed.addAndGet(BYTES_PER_NUMERIC_FIELD_ENTRY);
|
|
||||||
}
|
|
||||||
final NumericDocValuesUpdate current = fieldUpdates.get(update.term);
|
|
||||||
if (current != null && docIDUpto < current.docIDUpto) {
|
|
||||||
// Only record the new number if it's greater than or equal to the current
|
|
||||||
// one. This is important because if multiple threads are replacing the
|
|
||||||
// same doc at nearly the same time, it's possible that one thread that
|
|
||||||
// got a higher docID is scheduled before the other threads.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
update.docIDUpto = docIDUpto;
|
|
||||||
// since it's a LinkedHashMap, we must first remove the Term entry so that
|
|
||||||
// it's added last (we're interested in insertion-order).
|
|
||||||
if (current != null) {
|
|
||||||
fieldUpdates.remove(update.term);
|
|
||||||
}
|
|
||||||
fieldUpdates.put(update.term, update);
|
|
||||||
numNumericUpdates.incrementAndGet();
|
|
||||||
if (current == null) {
|
|
||||||
bytesUsed.addAndGet(BYTES_PER_NUMERIC_UPDATE_ENTRY + update.sizeInBytes());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addBinaryUpdate(BinaryDocValuesUpdate update, int docIDUpto) {
|
void addBinaryUpdate(BinaryDocValuesUpdate update, int docIDUpto) {
|
||||||
LinkedHashMap<Term,BinaryDocValuesUpdate> fieldUpdates = binaryUpdates.get(update.field);
|
if (addDocValuesUpdate(binaryUpdates, update, docIDUpto, update::prepareForApply, BYTES_PER_BINARY_UPDATE_ENTRY,
|
||||||
|
BYTES_PER_BINARY_FIELD_ENTRY)) {
|
||||||
|
numBinaryUpdates.incrementAndGet();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T extends DocValuesUpdate> boolean addDocValuesUpdate(Map<String,LinkedHashMap<Term,T>> updates, T update,
|
||||||
|
int docIDUpto, IntFunction<T> prepareForApply,
|
||||||
|
long bytesPerUpdateEntry, long bytesPerFieldEntry) {
|
||||||
|
LinkedHashMap<Term,T> fieldUpdates = updates.get(update.field);
|
||||||
if (fieldUpdates == null) {
|
if (fieldUpdates == null) {
|
||||||
fieldUpdates = new LinkedHashMap<>();
|
fieldUpdates = new LinkedHashMap<>();
|
||||||
binaryUpdates.put(update.field, fieldUpdates);
|
updates.put(update.field, fieldUpdates);
|
||||||
bytesUsed.addAndGet(BYTES_PER_BINARY_FIELD_ENTRY);
|
bytesUsed.addAndGet(bytesPerFieldEntry);
|
||||||
}
|
}
|
||||||
final BinaryDocValuesUpdate current = fieldUpdates.get(update.term);
|
final T current = fieldUpdates.get(update.term);
|
||||||
if (current != null && docIDUpto < current.docIDUpto) {
|
if (current != null && docIDUpto < current.docIDUpto) {
|
||||||
// Only record the new number if it's greater than or equal to the current
|
// Only record the new number if it's greater than or equal to the current
|
||||||
// one. This is important because if multiple threads are replacing the
|
// one. This is important because if multiple threads are replacing the
|
||||||
// same doc at nearly the same time, it's possible that one thread that
|
// same doc at nearly the same time, it's possible that one thread that
|
||||||
// got a higher docID is scheduled before the other threads.
|
// got a higher docID is scheduled before the other threads.
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
update.docIDUpto = docIDUpto;
|
|
||||||
// since it's a LinkedHashMap, we must first remove the Term entry so that
|
// since it's a LinkedHashMap, we must first remove the Term entry so that
|
||||||
// it's added last (we're interested in insertion-order).
|
// it's added last (we're interested in insertion-order).
|
||||||
if (current != null) {
|
if (current != null) {
|
||||||
fieldUpdates.remove(update.term);
|
fieldUpdates.remove(update.term);
|
||||||
}
|
}
|
||||||
fieldUpdates.put(update.term, update);
|
|
||||||
numBinaryUpdates.incrementAndGet();
|
fieldUpdates.put(update.term, prepareForApply.apply(docIDUpto)); // only make a copy if necessary
|
||||||
if (current == null) {
|
if (current == null) {
|
||||||
bytesUsed.addAndGet(BYTES_PER_BINARY_UPDATE_ENTRY + update.sizeInBytes());
|
bytesUsed.addAndGet(bytesPerUpdateEntry + update.sizeInBytes());
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void clearDeleteTerms() {
|
void clearDeleteTerms() {
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
|
@ -26,7 +27,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
abstract class DocValuesFieldUpdates {
|
abstract class DocValuesFieldUpdates implements Accountable {
|
||||||
|
|
||||||
protected static final int PAGE_SIZE = 1024;
|
protected static final int PAGE_SIZE = 1024;
|
||||||
|
|
||||||
|
@ -51,13 +52,18 @@ abstract class DocValuesFieldUpdates {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public abstract int nextDoc(); // no IOException
|
public abstract int nextDoc(); // no IOException
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the value of the document returned from {@link #nextDoc()}. A
|
* Returns a long value for the current document if this iterator is a long iterator.
|
||||||
* {@code null} value means that it was unset for this document.
|
|
||||||
*/
|
*/
|
||||||
abstract Object value();
|
abstract long longValue();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a binary value for the current document if this iterator is a binary value iterator.
|
||||||
|
*/
|
||||||
|
abstract BytesRef binaryValue();
|
||||||
|
|
||||||
/** Returns delGen for this packet. */
|
/** Returns delGen for this packet. */
|
||||||
abstract long delGen();
|
abstract long delGen();
|
||||||
|
@ -73,7 +79,7 @@ abstract class DocValuesFieldUpdates {
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public BytesRef binaryValue() {
|
public BytesRef binaryValue() {
|
||||||
return (BytesRef) iterator.value();
|
return iterator.binaryValue();
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public boolean advanceExact(int target) {
|
public boolean advanceExact(int target) {
|
||||||
|
@ -100,7 +106,7 @@ abstract class DocValuesFieldUpdates {
|
||||||
return new NumericDocValues() {
|
return new NumericDocValues() {
|
||||||
@Override
|
@Override
|
||||||
public long longValue() {
|
public long longValue() {
|
||||||
return ((Long)iterator.value()).longValue();
|
return iterator.longValue();
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public boolean advanceExact(int target) {
|
public boolean advanceExact(int target) {
|
||||||
|
@ -163,14 +169,9 @@ abstract class DocValuesFieldUpdates {
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Iterator() {
|
return new Iterator() {
|
||||||
private int doc;
|
private int doc = -1;
|
||||||
|
|
||||||
private boolean first = true;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() {
|
public int nextDoc() {
|
||||||
// TODO: can we do away with this first boolean?
|
|
||||||
if (first == false) {
|
|
||||||
// Advance all sub iterators past current doc
|
// Advance all sub iterators past current doc
|
||||||
while (true) {
|
while (true) {
|
||||||
if (queue.size() == 0) {
|
if (queue.size() == 0) {
|
||||||
|
@ -189,21 +190,22 @@ abstract class DocValuesFieldUpdates {
|
||||||
queue.updateTop();
|
queue.updateTop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
doc = queue.top().docID();
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() {
|
public int docID() {
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object value() {
|
long longValue() {
|
||||||
return queue.top().value();
|
return queue.top().longValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
BytesRef binaryValue() {
|
||||||
|
return queue.top().binaryValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -229,31 +231,34 @@ abstract class DocValuesFieldUpdates {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getFinished() {
|
boolean getFinished() {
|
||||||
return finished;
|
return finished;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
abstract void add(int doc, long value);
|
||||||
|
|
||||||
|
abstract void add(int doc, BytesRef value);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an update to a document. For unsetting a value you should pass
|
* Adds the value for the given docID.
|
||||||
* {@code null}.
|
* This method prevents conditional calls to {@link Iterator#longValue()} or {@link Iterator#binaryValue()}
|
||||||
|
* since the implementation knows if it's a long value iterator or binary value
|
||||||
*/
|
*/
|
||||||
public abstract void add(int doc, Object value);
|
abstract void add(int docId, Iterator iterator);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an {@link Iterator} over the updated documents and their
|
* Returns an {@link Iterator} over the updated documents and their
|
||||||
* values.
|
* values.
|
||||||
*/
|
*/
|
||||||
// TODO: also use this for merging, instead of having to write through to disk first
|
// TODO: also use this for merging, instead of having to write through to disk first
|
||||||
public abstract Iterator iterator();
|
abstract Iterator iterator();
|
||||||
|
|
||||||
/** Freezes internal data structures and sorts updates by docID for efficient iteration. */
|
/** Freezes internal data structures and sorts updates by docID for efficient iteration. */
|
||||||
public abstract void finish();
|
abstract void finish();
|
||||||
|
|
||||||
/** Returns true if this instance contains any updates. */
|
/** Returns true if this instance contains any updates. */
|
||||||
public abstract boolean any();
|
abstract boolean any();
|
||||||
|
|
||||||
/** Returns approximate RAM bytes used. */
|
abstract int size();
|
||||||
public abstract long ramBytesUsed();
|
|
||||||
|
|
||||||
public abstract int size();
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,11 +16,16 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
|
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
|
||||||
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
|
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
|
||||||
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||||
|
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/** An in-place update to a DocValues field. */
|
/** An in-place update to a DocValues field. */
|
||||||
|
@ -38,21 +43,22 @@ abstract class DocValuesUpdate {
|
||||||
final DocValuesType type;
|
final DocValuesType type;
|
||||||
final Term term;
|
final Term term;
|
||||||
final String field;
|
final String field;
|
||||||
final Object value;
|
// used in BufferedDeletes to apply this update only to a slice of docs. It's initialized to BufferedUpdates.MAX_INT
|
||||||
int docIDUpto = -1; // unassigned until applied, and confusing that it's here, when it's just used in BufferedDeletes...
|
// since it's safe and most often used this way we safe object creations.
|
||||||
|
final int docIDUpto;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
*
|
*
|
||||||
* @param term the {@link Term} which determines the documents that will be updated
|
* @param term the {@link Term} which determines the documents that will be updated
|
||||||
* @param field the {@link NumericDocValuesField} to update
|
* @param field the {@link NumericDocValuesField} to update
|
||||||
* @param value the updated value
|
|
||||||
*/
|
*/
|
||||||
protected DocValuesUpdate(DocValuesType type, Term term, String field, Object value) {
|
protected DocValuesUpdate(DocValuesType type, Term term, String field, int docIDUpto) {
|
||||||
|
assert docIDUpto >= 0 : docIDUpto + "must be >= 0";
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.term = term;
|
this.term = term;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.value = value;
|
this.docIDUpto = docIDUpto;
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract long valueSizeInBytes();
|
abstract long valueSizeInBytes();
|
||||||
|
@ -65,38 +71,102 @@ abstract class DocValuesUpdate {
|
||||||
sizeInBytes += valueSizeInBytes();
|
sizeInBytes += valueSizeInBytes();
|
||||||
return sizeInBytes;
|
return sizeInBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected abstract String valueToString();
|
||||||
|
|
||||||
|
abstract void writeTo(DataOutput output) throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "term=" + term + ",field=" + field + ",value=" + value + ",docIDUpto=" + docIDUpto;
|
return "term=" + term + ",field=" + field + ",value=" + valueToString() + ",docIDUpto=" + docIDUpto;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** An in-place update to a binary DocValues field */
|
/** An in-place update to a binary DocValues field */
|
||||||
static final class BinaryDocValuesUpdate extends DocValuesUpdate {
|
static final class BinaryDocValuesUpdate extends DocValuesUpdate {
|
||||||
|
private final BytesRef value;
|
||||||
|
|
||||||
/* Size of BytesRef: 2*INT + ARRAY_HEADER + PTR */
|
/* Size of BytesRef: 2*INT + ARRAY_HEADER + PTR */
|
||||||
private static final long RAW_VALUE_SIZE_IN_BYTES = NUM_BYTES_ARRAY_HEADER + 2*Integer.BYTES + NUM_BYTES_OBJECT_REF;
|
private static final long RAW_VALUE_SIZE_IN_BYTES = NUM_BYTES_ARRAY_HEADER + 2*Integer.BYTES + NUM_BYTES_OBJECT_REF;
|
||||||
|
|
||||||
BinaryDocValuesUpdate(Term term, String field, BytesRef value) {
|
BinaryDocValuesUpdate(Term term, String field, BytesRef value) {
|
||||||
super(DocValuesType.BINARY, term, field, value);
|
this(term, field, value, BufferedUpdates.MAX_INT);
|
||||||
|
}
|
||||||
|
|
||||||
|
private BinaryDocValuesUpdate(Term term, String field, BytesRef value, int docIDUpTo) {
|
||||||
|
super(DocValuesType.BINARY, term, field, docIDUpTo);
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinaryDocValuesUpdate prepareForApply(int docIDUpto) {
|
||||||
|
if (docIDUpto == this.docIDUpto) {
|
||||||
|
return this; // it's a final value so we can safely reuse this instance
|
||||||
|
}
|
||||||
|
return new BinaryDocValuesUpdate(term, field, value, docIDUpto);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
long valueSizeInBytes() {
|
long valueSizeInBytes() {
|
||||||
return RAW_VALUE_SIZE_IN_BYTES + ((BytesRef) value).bytes.length;
|
return RAW_VALUE_SIZE_IN_BYTES + value.bytes.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String valueToString() {
|
||||||
|
return value.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void writeTo(DataOutput out) throws IOException {
|
||||||
|
out.writeVInt(value.length);
|
||||||
|
out.writeBytes(value.bytes, value.offset, value.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
static BytesRef readFrom(DataInput in, BytesRef scratch) throws IOException {
|
||||||
|
scratch.length = in.readVInt();
|
||||||
|
if (scratch.bytes.length < scratch.length) {
|
||||||
|
scratch.bytes = ArrayUtil.grow(scratch.bytes, scratch.length);
|
||||||
|
}
|
||||||
|
in.readBytes(scratch.bytes, 0, scratch.length);
|
||||||
|
return scratch;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** An in-place update to a numeric DocValues field */
|
/** An in-place update to a numeric DocValues field */
|
||||||
static final class NumericDocValuesUpdate extends DocValuesUpdate {
|
static final class NumericDocValuesUpdate extends DocValuesUpdate {
|
||||||
|
private final long value;
|
||||||
|
|
||||||
NumericDocValuesUpdate(Term term, String field, Long value) {
|
NumericDocValuesUpdate(Term term, String field, long value) {
|
||||||
super(DocValuesType.NUMERIC, term, field, value);
|
this(term, field, value, BufferedUpdates.MAX_INT);
|
||||||
|
}
|
||||||
|
|
||||||
|
private NumericDocValuesUpdate(Term term, String field, long value, int docIDUpTo) {
|
||||||
|
super(DocValuesType.NUMERIC, term, field, docIDUpTo);
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValuesUpdate prepareForApply(int docIDUpto) {
|
||||||
|
if (docIDUpto == this.docIDUpto) {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
return new NumericDocValuesUpdate(term, field, value, docIDUpto);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
long valueSizeInBytes() {
|
long valueSizeInBytes() {
|
||||||
return Long.BYTES;
|
return Long.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String valueToString() {
|
||||||
|
return Long.toString(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void writeTo(DataOutput out) throws IOException {
|
||||||
|
out.writeZLong(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static long readFrom(DataInput in) throws IOException {
|
||||||
|
return in.readZLong();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
||||||
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -412,10 +411,10 @@ final class DocumentsWriterDeleteQueue implements Accountable {
|
||||||
for (DocValuesUpdate update : item) {
|
for (DocValuesUpdate update : item) {
|
||||||
switch (update.type) {
|
switch (update.type) {
|
||||||
case NUMERIC:
|
case NUMERIC:
|
||||||
bufferedUpdates.addNumericUpdate(new NumericDocValuesUpdate(update.term, update.field, (Long) update.value), docIDUpto);
|
bufferedUpdates.addNumericUpdate((NumericDocValuesUpdate) update, docIDUpto);
|
||||||
break;
|
break;
|
||||||
case BINARY:
|
case BINARY:
|
||||||
bufferedUpdates.addBinaryUpdate(new BinaryDocValuesUpdate(update.term, update.field, (BytesRef) update.value), docIDUpto);
|
bufferedUpdates.addBinaryUpdate((BinaryDocValuesUpdate) update, docIDUpto);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new IllegalArgumentException(update.type + " DocValues updates not supported yet!");
|
throw new IllegalArgumentException(update.type + " DocValues updates not supported yet!");
|
||||||
|
@ -436,7 +435,7 @@ final class DocumentsWriterDeleteQueue implements Accountable {
|
||||||
if (item.length > 0) {
|
if (item.length > 0) {
|
||||||
sb.append("term=").append(item[0].term).append("; updates: [");
|
sb.append("term=").append(item[0].term).append("; updates: [");
|
||||||
for (DocValuesUpdate update : item) {
|
for (DocValuesUpdate update : item) {
|
||||||
sb.append(update.field).append(':').append(update.value).append(',');
|
sb.append(update.field).append(':').append(update.valueToString()).append(',');
|
||||||
}
|
}
|
||||||
sb.setCharAt(sb.length()-1, ']');
|
sb.setCharAt(sb.length()-1, ']');
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,7 @@ import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.function.IntConsumer;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
||||||
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
||||||
|
@ -44,6 +45,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
@ -76,8 +78,8 @@ final class FrozenBufferedUpdates {
|
||||||
// binary DV update term and their updates
|
// binary DV update term and their updates
|
||||||
final byte[] binaryDVUpdates;
|
final byte[] binaryDVUpdates;
|
||||||
|
|
||||||
private int numericDVUpdateCount;
|
private final int numericDVUpdateCount;
|
||||||
private int binaryDVUpdateCount;
|
private final int binaryDVUpdateCount;
|
||||||
|
|
||||||
/** Counts down once all deletes/updates have been applied */
|
/** Counts down once all deletes/updates have been applied */
|
||||||
public final CountDownLatch applied = new CountDownLatch(1);
|
public final CountDownLatch applied = new CountDownLatch(1);
|
||||||
|
@ -116,19 +118,22 @@ final class FrozenBufferedUpdates {
|
||||||
deleteQueryLimits[upto] = ent.getValue();
|
deleteQueryLimits[upto] = ent.getValue();
|
||||||
upto++;
|
upto++;
|
||||||
}
|
}
|
||||||
|
Counter counter = Counter.newCounter();
|
||||||
// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
|
// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
|
||||||
// so that it maps to all fields it affects, sorted by their docUpto, and traverse
|
// so that it maps to all fields it affects, sorted by their docUpto, and traverse
|
||||||
// that Term only once, applying the update to all fields that still need to be
|
// that Term only once, applying the update to all fields that still need to be
|
||||||
// updated.
|
// updated.
|
||||||
numericDVUpdates = freezeNumericDVUpdates(updates.numericUpdates);
|
numericDVUpdates = freezeDVUpdates(updates.numericUpdates, counter::addAndGet);
|
||||||
|
numericDVUpdateCount = (int)counter.get();
|
||||||
|
counter.addAndGet(-counter.get());
|
||||||
|
assert counter.get() == 0;
|
||||||
// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
|
// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
|
||||||
// so that it maps to all fields it affects, sorted by their docUpto, and traverse
|
// so that it maps to all fields it affects, sorted by their docUpto, and traverse
|
||||||
// that Term only once, applying the update to all fields that still need to be
|
// that Term only once, applying the update to all fields that still need to be
|
||||||
// updated.
|
// updated.
|
||||||
binaryDVUpdates = freezeBinaryDVUpdates(updates.binaryUpdates);
|
binaryDVUpdates = freezeDVUpdates(updates.binaryUpdates, counter::addAndGet);
|
||||||
|
binaryDVUpdateCount = (int)counter.get();
|
||||||
|
|
||||||
bytesUsed = (int) (deleteTerms.ramBytesUsed() + deleteQueries.length * BYTES_PER_DEL_QUERY
|
bytesUsed = (int) (deleteTerms.ramBytesUsed() + deleteQueries.length * BYTES_PER_DEL_QUERY
|
||||||
+ numericDVUpdates.length + binaryDVUpdates.length);
|
+ numericDVUpdates.length + binaryDVUpdates.length);
|
||||||
|
|
||||||
|
@ -141,60 +146,17 @@ final class FrozenBufferedUpdates {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] freezeNumericDVUpdates(Map<String,LinkedHashMap<Term,NumericDocValuesUpdate>> numericDVUpdates)
|
private static <T extends DocValuesUpdate> byte[] freezeDVUpdates(Map<String,LinkedHashMap<Term, T>> dvUpdates,
|
||||||
|
IntConsumer updateSizeConsumer)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// TODO: we could do better here, e.g. collate the updates by field
|
// TODO: we could do better here, e.g. collate the updates by field
|
||||||
// so if you are updating 2 fields interleaved we don't keep writing the field strings
|
// so if you are updating 2 fields interleaved we don't keep writing the field strings
|
||||||
try (RAMOutputStream out = new RAMOutputStream()) {
|
try (RAMOutputStream out = new RAMOutputStream()) {
|
||||||
String lastTermField = null;
|
String lastTermField = null;
|
||||||
String lastUpdateField = null;
|
String lastUpdateField = null;
|
||||||
for (LinkedHashMap<Term, NumericDocValuesUpdate> numericUpdates : numericDVUpdates.values()) {
|
for (LinkedHashMap<Term, T> updates : dvUpdates.values()) {
|
||||||
numericDVUpdateCount += numericUpdates.size();
|
updateSizeConsumer.accept(updates.size());
|
||||||
for (NumericDocValuesUpdate update : numericUpdates.values()) {
|
for (T update : updates.values()) {
|
||||||
|
|
||||||
int code = update.term.bytes().length << 2;
|
|
||||||
|
|
||||||
String termField = update.term.field();
|
|
||||||
if (termField.equals(lastTermField) == false) {
|
|
||||||
code |= 1;
|
|
||||||
}
|
|
||||||
String updateField = update.field;
|
|
||||||
if (updateField.equals(lastUpdateField) == false) {
|
|
||||||
code |= 2;
|
|
||||||
}
|
|
||||||
out.writeVInt(code);
|
|
||||||
out.writeVInt(update.docIDUpto);
|
|
||||||
if ((code & 1) != 0) {
|
|
||||||
out.writeString(termField);
|
|
||||||
lastTermField = termField;
|
|
||||||
}
|
|
||||||
if ((code & 2) != 0) {
|
|
||||||
out.writeString(updateField);
|
|
||||||
lastUpdateField = updateField;
|
|
||||||
}
|
|
||||||
|
|
||||||
out.writeBytes(update.term.bytes().bytes, update.term.bytes().offset, update.term.bytes().length);
|
|
||||||
out.writeZLong(((Long) update.value).longValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
byte[] bytes = new byte[(int) out.getFilePointer()];
|
|
||||||
out.writeTo(bytes, 0);
|
|
||||||
return bytes;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private byte[] freezeBinaryDVUpdates(Map<String,LinkedHashMap<Term,BinaryDocValuesUpdate>> binaryDVUpdates)
|
|
||||||
throws IOException {
|
|
||||||
// TODO: we could do better here, e.g. collate the updates by field
|
|
||||||
// so if you are updating 2 fields interleaved we don't keep writing the field strings
|
|
||||||
|
|
||||||
try (RAMOutputStream out = new RAMOutputStream()) {
|
|
||||||
String lastTermField = null;
|
|
||||||
String lastUpdateField = null;
|
|
||||||
for (LinkedHashMap<Term, BinaryDocValuesUpdate> binaryUpdates : binaryDVUpdates.values()) {
|
|
||||||
binaryDVUpdateCount += binaryUpdates.size();
|
|
||||||
for (BinaryDocValuesUpdate update : binaryUpdates.values()) {
|
|
||||||
|
|
||||||
int code = update.term.bytes().length << 2;
|
int code = update.term.bytes().length << 2;
|
||||||
|
|
||||||
String termField = update.term.field();
|
String termField = update.term.field();
|
||||||
|
@ -216,10 +178,7 @@ final class FrozenBufferedUpdates {
|
||||||
lastUpdateField = updateField;
|
lastUpdateField = updateField;
|
||||||
}
|
}
|
||||||
out.writeBytes(update.term.bytes().bytes, update.term.bytes().offset, update.term.bytes().length);
|
out.writeBytes(update.term.bytes().bytes, update.term.bytes().offset, update.term.bytes().length);
|
||||||
|
update.writeTo(out);
|
||||||
BytesRef value = (BytesRef) update.value;
|
|
||||||
out.writeVInt(value.length);
|
|
||||||
out.writeBytes(value.bytes, value.offset, value.length);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
byte[] bytes = new byte[(int) out.getFilePointer()];
|
byte[] bytes = new byte[(int) out.getFilePointer()];
|
||||||
|
@ -521,13 +480,13 @@ final class FrozenBufferedUpdates {
|
||||||
// because we will run on the newly merged segment next:
|
// because we will run on the newly merged segment next:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
final boolean isSegmentPrivateDeletes = privateSegment != null;
|
||||||
if (numericDVUpdates.length > 0) {
|
if (numericDVUpdates.length > 0) {
|
||||||
updateCount += applyDocValuesUpdates(segState, numericDVUpdates, true);
|
updateCount += applyDocValuesUpdates(segState, numericDVUpdates, true, delGen, isSegmentPrivateDeletes);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (binaryDVUpdates.length > 0) {
|
if (binaryDVUpdates.length > 0) {
|
||||||
updateCount += applyDocValuesUpdates(segState, binaryDVUpdates, false);
|
updateCount += applyDocValuesUpdates(segState, binaryDVUpdates, false, delGen, isSegmentPrivateDeletes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -544,8 +503,9 @@ final class FrozenBufferedUpdates {
|
||||||
return updateCount;
|
return updateCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
private long applyDocValuesUpdates(BufferedUpdatesStream.SegmentState segState,
|
private static long applyDocValuesUpdates(BufferedUpdatesStream.SegmentState segState, byte[] updates,
|
||||||
byte[] updates, boolean isNumeric) throws IOException {
|
boolean isNumeric, long delGen,
|
||||||
|
boolean segmentPrivateDeletes) throws IOException {
|
||||||
|
|
||||||
TermsEnum termsEnum = null;
|
TermsEnum termsEnum = null;
|
||||||
PostingsEnum postingsEnum = null;
|
PostingsEnum postingsEnum = null;
|
||||||
|
@ -592,9 +552,9 @@ final class FrozenBufferedUpdates {
|
||||||
}
|
}
|
||||||
in.readBytes(term.bytes, 0, term.length);
|
in.readBytes(term.bytes, 0, term.length);
|
||||||
|
|
||||||
int limit;
|
final int limit;
|
||||||
if (delGen == segState.delGen) {
|
if (delGen == segState.delGen) {
|
||||||
assert privateSegment != null;
|
assert segmentPrivateDeletes;
|
||||||
limit = docIDUpto;
|
limit = docIDUpto;
|
||||||
} else {
|
} else {
|
||||||
limit = Integer.MAX_VALUE;
|
limit = Integer.MAX_VALUE;
|
||||||
|
@ -622,17 +582,14 @@ final class FrozenBufferedUpdates {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: can we avoid boxing here w/o fully forking this method?
|
final BytesRef binaryValue;
|
||||||
Object value;
|
final long longValue;
|
||||||
if (isNumeric) {
|
if (isNumeric) {
|
||||||
value = Long.valueOf(in.readZLong());
|
longValue = NumericDocValuesUpdate.readFrom(in);
|
||||||
|
binaryValue = null;
|
||||||
} else {
|
} else {
|
||||||
value = scratch;
|
longValue = -1;
|
||||||
scratch.length = in.readVInt();
|
binaryValue = BinaryDocValuesUpdate.readFrom(in, scratch);
|
||||||
if (scratch.bytes.length < scratch.length) {
|
|
||||||
scratch.bytes = ArrayUtil.grow(scratch.bytes, scratch.length);
|
|
||||||
}
|
|
||||||
in.readBytes(scratch.bytes, 0, scratch.length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (termsEnum == null) {
|
if (termsEnum == null) {
|
||||||
|
@ -641,10 +598,8 @@ final class FrozenBufferedUpdates {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (termsEnum.seekExact(term)) {
|
if (termsEnum.seekExact(term)) {
|
||||||
|
|
||||||
// we don't need term frequencies for this
|
// we don't need term frequencies for this
|
||||||
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
|
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
|
||||||
|
|
||||||
DocValuesFieldUpdates dvUpdates = holder.get(updateField);
|
DocValuesFieldUpdates dvUpdates = holder.get(updateField);
|
||||||
if (dvUpdates == null) {
|
if (dvUpdates == null) {
|
||||||
if (isNumeric) {
|
if (isNumeric) {
|
||||||
|
@ -652,38 +607,38 @@ final class FrozenBufferedUpdates {
|
||||||
} else {
|
} else {
|
||||||
dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
|
||||||
}
|
}
|
||||||
|
|
||||||
holder.put(updateField, dvUpdates);
|
holder.put(updateField, dvUpdates);
|
||||||
}
|
}
|
||||||
|
final IntConsumer docIdConsumer;
|
||||||
if (segState.rld.sortMap != null && privateSegment != null) {
|
final DocValuesFieldUpdates update = dvUpdates;
|
||||||
|
if (isNumeric) {
|
||||||
|
docIdConsumer = doc -> update.add(doc, longValue);
|
||||||
|
} else {
|
||||||
|
docIdConsumer = doc -> update.add(doc, binaryValue);
|
||||||
|
}
|
||||||
|
final Bits acceptDocs = segState.rld.getLiveDocs();
|
||||||
|
if (segState.rld.sortMap != null && segmentPrivateDeletes) {
|
||||||
// This segment was sorted on flush; we must apply seg-private deletes carefully in this case:
|
// This segment was sorted on flush; we must apply seg-private deletes carefully in this case:
|
||||||
int doc;
|
int doc;
|
||||||
final Bits acceptDocs = segState.rld.getLiveDocs();
|
|
||||||
while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
if (acceptDocs == null || acceptDocs.get(doc)) {
|
||||||
if (acceptDocs != null && acceptDocs.get(doc) == false) {
|
// The limit is in the pre-sorted doc space:
|
||||||
continue;
|
if (segState.rld.sortMap.newToOld(doc) < limit) {
|
||||||
}
|
docIdConsumer.accept(doc);
|
||||||
|
updateCount++;
|
||||||
// The limit is in the pre-sorted doc space:
|
}
|
||||||
if (segState.rld.sortMap.newToOld(doc) < limit) {
|
|
||||||
dvUpdates.add(doc, value);
|
|
||||||
updateCount++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int doc;
|
int doc;
|
||||||
final Bits acceptDocs = segState.rld.getLiveDocs();
|
|
||||||
while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
if (doc >= limit) {
|
if (doc >= limit) {
|
||||||
break; // no more docs that can be updated for this term
|
break; // no more docs that can be updated for this term
|
||||||
}
|
}
|
||||||
if (acceptDocs != null && acceptDocs.get(doc) == false) {
|
if (acceptDocs == null || acceptDocs.get(doc)) {
|
||||||
continue;
|
docIdConsumer.accept(doc);
|
||||||
|
updateCount++;
|
||||||
}
|
}
|
||||||
dvUpdates.add(doc, value);
|
|
||||||
updateCount++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -888,7 +843,7 @@ final class FrozenBufferedUpdates {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (deleteQueries.length != 0) {
|
if (deleteQueries.length != 0) {
|
||||||
s += " numDeleteQuerys=" + deleteQueries.length;
|
s += " numDeleteQueries=" + deleteQueries.length;
|
||||||
}
|
}
|
||||||
if (numericDVUpdates.length > 0) {
|
if (numericDVUpdates.length > 0) {
|
||||||
s += " numNumericDVUpdates=" + numericDVUpdateCount;
|
s += " numNumericDVUpdates=" + numericDVUpdateCount;
|
||||||
|
|
|
@ -3699,7 +3699,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
int mappedDoc = segDocMap.get(segLeafDocMap.get(doc));
|
int mappedDoc = segDocMap.get(segLeafDocMap.get(doc));
|
||||||
if (mappedDoc != -1) {
|
if (mappedDoc != -1) {
|
||||||
// not deleted
|
// not deleted
|
||||||
mappedUpdates.add(mappedDoc, it.value());
|
mappedUpdates.add(mappedDoc, it);
|
||||||
anyDVUpdates = true;
|
anyDVUpdates = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
@ -31,7 +32,7 @@ import org.apache.lucene.util.packed.PagedMutable;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
|
|
||||||
// TODO: can't this just be NumericDocValues now? avoid boxing the long value...
|
// TODO: can't this just be NumericDocValues now? avoid boxing the long value...
|
||||||
final static class Iterator extends DocValuesFieldUpdates.Iterator {
|
final static class Iterator extends DocValuesFieldUpdates.Iterator {
|
||||||
|
@ -40,7 +41,7 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
private final PagedMutable docs;
|
private final PagedMutable docs;
|
||||||
private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE
|
private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE
|
||||||
private int doc = -1;
|
private int doc = -1;
|
||||||
private Long value = null;
|
private long value;
|
||||||
private final long delGen;
|
private final long delGen;
|
||||||
|
|
||||||
Iterator(int size, PagedGrowableWriter values, PagedMutable docs, long delGen) {
|
Iterator(int size, PagedGrowableWriter values, PagedMutable docs, long delGen) {
|
||||||
|
@ -50,15 +51,20 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
this.delGen = delGen;
|
this.delGen = delGen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Long value() {
|
long longValue() {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
BytesRef binaryValue() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() {
|
public int nextDoc() {
|
||||||
if (idx >= size) {
|
if (idx >= size) {
|
||||||
value = null;
|
|
||||||
return doc = DocIdSetIterator.NO_MORE_DOCS;
|
return doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
doc = (int) docs.get(idx);
|
doc = (int) docs.get(idx);
|
||||||
|
@ -68,7 +74,7 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
++idx;
|
++idx;
|
||||||
}
|
}
|
||||||
// idx points to the "next" element
|
// idx points to the "next" element
|
||||||
value = Long.valueOf(values.get(idx - 1));
|
value = values.get(idx - 1);
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,28 +107,36 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void add(int doc, Object value) {
|
void add(int doc, BytesRef value) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void add(int docId, DocValuesFieldUpdates.Iterator iterator) {
|
||||||
|
add(docId, iterator.longValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized void add(int doc, long value) {
|
||||||
if (finished) {
|
if (finished) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
assert doc < maxDoc;
|
assert doc < maxDoc;
|
||||||
|
|
||||||
// TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
|
// TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
|
||||||
if (size == Integer.MAX_VALUE) {
|
if (size == Integer.MAX_VALUE) {
|
||||||
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
|
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
|
||||||
}
|
}
|
||||||
|
|
||||||
Long val = (Long) value;
|
|
||||||
|
|
||||||
// grow the structures to have room for more elements
|
// grow the structures to have room for more elements
|
||||||
if (docs.size() == size) {
|
if (docs.size() == size) {
|
||||||
docs = docs.grow(size + 1);
|
docs = docs.grow(size + 1);
|
||||||
values = values.grow(size + 1);
|
values = values.grow(size + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
docs.set(size, doc);
|
docs.set(size, doc);
|
||||||
values.set(size, val.longValue());
|
values.set(size, value);
|
||||||
++size;
|
++size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,13 +170,13 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
// increasing docID order:
|
// increasing docID order:
|
||||||
// NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
|
// NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
|
||||||
// stable and preserving original order so the last update to that docID wins
|
// stable and preserving original order so the last update to that docID wins
|
||||||
return Integer.compare((int) docs.get(i), (int) docs.get(j));
|
return Long.compare(docs.get(i), docs.get(j));
|
||||||
}
|
}
|
||||||
}.sort(0, size);
|
}.sort(0, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator iterator() {
|
Iterator iterator() {
|
||||||
if (finished == false) {
|
if (finished == false) {
|
||||||
throw new IllegalStateException("call finish first");
|
throw new IllegalStateException("call finish first");
|
||||||
}
|
}
|
||||||
|
@ -170,7 +184,7 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean any() {
|
boolean any() {
|
||||||
return size > 0;
|
return size > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -146,12 +146,7 @@ final class ReadersAndUpdates {
|
||||||
if (update.getFinished() == false) {
|
if (update.getFinished() == false) {
|
||||||
throw new IllegalArgumentException("call finish first");
|
throw new IllegalArgumentException("call finish first");
|
||||||
}
|
}
|
||||||
List<DocValuesFieldUpdates> fieldUpdates = pendingDVUpdates.get(update.field);
|
List<DocValuesFieldUpdates> fieldUpdates = pendingDVUpdates.computeIfAbsent(update.field, key -> new ArrayList<>());
|
||||||
if (fieldUpdates == null) {
|
|
||||||
fieldUpdates = new ArrayList<>();
|
|
||||||
pendingDVUpdates.put(update.field, fieldUpdates);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert assertNoDupGen(fieldUpdates, update);
|
assert assertNoDupGen(fieldUpdates, update);
|
||||||
|
|
||||||
ramBytesUsed.addAndGet(update.ramBytesUsed());
|
ramBytesUsed.addAndGet(update.ramBytesUsed());
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestDocValuesFieldUpdates extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testMergeIterator() {
|
||||||
|
NumericDocValuesFieldUpdates updates1 = new NumericDocValuesFieldUpdates(0, "test", 6);
|
||||||
|
NumericDocValuesFieldUpdates updates2 = new NumericDocValuesFieldUpdates(1, "test", 6);
|
||||||
|
NumericDocValuesFieldUpdates updates3 = new NumericDocValuesFieldUpdates(2, "test", 6);
|
||||||
|
NumericDocValuesFieldUpdates updates4 = new NumericDocValuesFieldUpdates(2, "test", 6);
|
||||||
|
|
||||||
|
updates1.add(0, 1);
|
||||||
|
updates1.add(4, 0);
|
||||||
|
updates1.add(1, 4);
|
||||||
|
updates1.add(2, 5);
|
||||||
|
updates1.add(4, 9);
|
||||||
|
assertTrue(updates1.any());
|
||||||
|
|
||||||
|
updates2.add(0, 18);
|
||||||
|
updates2.add(1, 7);
|
||||||
|
updates2.add(2, 19);
|
||||||
|
updates2.add(5, 24);
|
||||||
|
assertTrue(updates2.any());
|
||||||
|
|
||||||
|
updates3.add(2, 42);
|
||||||
|
assertTrue(updates3.any());
|
||||||
|
assertFalse(updates4.any());
|
||||||
|
updates1.finish();
|
||||||
|
updates2.finish();
|
||||||
|
updates3.finish();
|
||||||
|
updates4.finish();
|
||||||
|
List<DocValuesFieldUpdates.Iterator> iterators = Arrays.asList(updates1.iterator(), updates2.iterator(),
|
||||||
|
updates3.iterator(), updates4.iterator());
|
||||||
|
Collections.shuffle(iterators, random());
|
||||||
|
DocValuesFieldUpdates.Iterator iterator = DocValuesFieldUpdates
|
||||||
|
.mergedIterator(iterators.toArray(new DocValuesFieldUpdates.Iterator[0]));
|
||||||
|
assertEquals(0, iterator.nextDoc());
|
||||||
|
assertEquals(18, iterator.longValue());
|
||||||
|
assertEquals(1, iterator.nextDoc());
|
||||||
|
assertEquals(7, iterator.longValue());
|
||||||
|
assertEquals(2, iterator.nextDoc());
|
||||||
|
assertEquals(42, iterator.longValue());
|
||||||
|
assertEquals(4, iterator.nextDoc());
|
||||||
|
assertEquals(9, iterator.longValue());
|
||||||
|
assertEquals(5, iterator.nextDoc());
|
||||||
|
assertEquals(24, iterator.longValue());
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.nextDoc());
|
||||||
|
}
|
||||||
|
}
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
@ -201,7 +202,18 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
||||||
private DocValuesFieldUpdates singleUpdate(List<Integer> docsDeleted, int maxDoc) {
|
private DocValuesFieldUpdates singleUpdate(List<Integer> docsDeleted, int maxDoc) {
|
||||||
return new DocValuesFieldUpdates(maxDoc, 0, "_soft_deletes", DocValuesType.NUMERIC) {
|
return new DocValuesFieldUpdates(maxDoc, 0, "_soft_deletes", DocValuesType.NUMERIC) {
|
||||||
@Override
|
@Override
|
||||||
public void add(int doc, Object value) {
|
public void add(int doc, long value) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int doc, BytesRef value) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docId, Iterator iterator) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -216,13 +228,18 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() {
|
long longValue() {
|
||||||
return doc;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Object value() {
|
BytesRef binaryValue() {
|
||||||
return 1;
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue