LUCENE-8289: Share logic between Numeric and Binary DocValuesFieldUpdates

NumericDocValuesFieldUpdates and BinaryDocValuesFieldUpdates duplicate
a significant amount of logic that can all be pushed into the base class.
This change moves all the logic that is independent of the type to the base
class.
This commit is contained in:
Simon Willnauer 2018-05-02 11:55:32 +02:00
parent df713fc700
commit 82e7cb2322
4 changed files with 204 additions and 261 deletions

View File

@ -18,10 +18,8 @@ package org.apache.lucene.index;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedGrowableWriter;
@ -35,25 +33,18 @@ import org.apache.lucene.util.packed.PagedMutable;
*/
final class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
final static class Iterator extends DocValuesFieldUpdates.Iterator {
private final int size;
final static class Iterator extends DocValuesFieldUpdates.AbstractIterator {
private final PagedGrowableWriter offsets;
private final PagedGrowableWriter lengths;
private final PagedMutable docs;
private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE
private int doc = -1;
private final BytesRef value;
private int offset, length;
private final long delGen;
Iterator(int size, PagedGrowableWriter offsets, PagedGrowableWriter lengths,
PagedMutable docs, BytesRef values, long delGen) {
super(size, docs, delGen);
this.offsets = offsets;
this.size = size;
this.lengths = lengths;
this.docs = docs;
value = values.clone();
this.delGen = delGen;
}
@Override
@ -64,35 +55,9 @@ final class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
}
@Override
public int nextDoc() {
if (idx >= size) {
offset = -1;
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
doc = (int) docs.get(idx);
++idx;
while (idx < size && docs.get(idx) == doc) {
// scan forward to last update to this doc
++idx;
}
// idx points to the "next" element
long prevIdx = idx - 1;
// cannot change 'value' here because nextDoc is called before the
// value is used, and it's a waste to clone the BytesRef when we
// obtain the value
offset = (int) offsets.get(prevIdx);
length = (int) lengths.get(prevIdx);
return doc;
}
@Override
public int docID() {
return doc;
}
@Override
long delGen() {
return delGen;
protected void set(long idx) {
offset = (int) offsets.get(idx);
length = (int) lengths.get(idx);
}
@Override
@ -101,26 +66,16 @@ final class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
}
}
private PagedMutable docs;
private PagedGrowableWriter offsets, lengths;
private BytesRefBuilder values;
private int size;
private final int bitsPerValue;
public BinaryDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
super(maxDoc, delGen, field, DocValuesType.BINARY);
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
values = new BytesRefBuilder();
}
@Override
public int size() {
return size;
}
@Override
public void add(int doc, long value) {
throw new UnsupportedOperationException();
@ -133,92 +88,53 @@ final class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
@Override
synchronized public void add(int doc, BytesRef value) {
if (finished) {
throw new IllegalStateException("already finished");
}
assert doc < maxDoc: "doc=" + doc + " maxDoc=" + maxDoc;
// TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
if (size == Integer.MAX_VALUE) {
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
}
// grow the structures to have room for more elements
if (docs.size() == size) {
docs = docs.grow(size + 1);
offsets = offsets.grow(size + 1);
lengths = lengths.grow(size + 1);
}
docs.set(size, doc);
offsets.set(size, values.length());
lengths.set(size, value.length);
int index = add(doc);
offsets.set(index, values.length());
lengths.set(index, value.length);
values.append(value);
++size;
}
@Override
public void finish() {
if (finished) {
throw new IllegalStateException("already finished");
}
finished = true;
protected void swap(int i, int j) {
super.swap(i, j);
// shrink wrap
if (size < docs.size()) {
docs = docs.resize(size);
offsets = offsets.resize(size);
lengths = lengths.resize(size);
}
long tmpOffset = offsets.get(j);
offsets.set(j, offsets.get(i));
offsets.set(i, tmpOffset);
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpDoc = docs.get(j);
docs.set(j, docs.get(i));
docs.set(i, tmpDoc);
long tmpOffset = offsets.get(j);
offsets.set(j, offsets.get(i));
offsets.set(i, tmpOffset);
long tmpLength = lengths.get(j);
lengths.set(j, lengths.get(i));
lengths.set(i, tmpLength);
}
long tmpLength = lengths.get(j);
lengths.set(j, lengths.get(i));
lengths.set(i, tmpLength);
}
@Override
protected int compare(int i, int j) {
// increasing docID order:
// NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
// stable and preserving original order so the last update to that docID wins
return Integer.compare((int) docs.get(i), (int) docs.get(j));
}
}.sort(0, size);
@Override
protected void grow(int size) {
super.grow(size);
offsets = offsets.grow(size);
lengths = lengths.grow(size);
}
@Override
protected void resize(int size) {
super.resize(size);
offsets = offsets.resize(size);
lengths = lengths.resize(size);
}
@Override
public Iterator iterator() {
if (finished == false) {
throw new IllegalStateException("call finish first");
}
ensureFinished();
return new Iterator(size, offsets, lengths, docs, values.get(), delGen);
}
@Override
public boolean any() {
return size > 0;
}
@Override
public long ramBytesUsed() {
return offsets.ramBytesUsed()
return super.ramBytesUsed()
+ offsets.ramBytesUsed()
+ lengths.ramBytesUsed()
+ docs.ramBytesUsed()
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 4 * Integer.BYTES
+ 5 * RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ 2 * Integer.BYTES
+ 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ values.bytes().length;
}
}

View File

@ -16,9 +16,14 @@
*/
package org.apache.lucene.index;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedMutable;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@ -218,9 +223,12 @@ abstract class DocValuesFieldUpdates implements Accountable {
final String field;
final DocValuesType type;
final long delGen;
protected boolean finished;
private final int bitsPerValue;
private boolean finished;
protected final int maxDoc;
protected PagedMutable docs;
protected int size;
protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
this.maxDoc = maxDoc;
this.delGen = delGen;
@ -229,9 +237,11 @@ abstract class DocValuesFieldUpdates implements Accountable {
throw new NullPointerException("DocValuesType must not be null");
}
this.type = type;
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
}
boolean getFinished() {
final boolean getFinished() {
return finished;
}
@ -254,11 +264,132 @@ abstract class DocValuesFieldUpdates implements Accountable {
abstract Iterator iterator();
/** Freezes internal data structures and sorts updates by docID for efficient iteration. */
abstract void finish();
final synchronized void finish() {
if (finished) {
throw new IllegalStateException("already finished");
}
finished = true;
// shrink wrap
if (size < docs.size()) {
resize(size);
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
DocValuesFieldUpdates.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
// increasing docID order:
// NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
// stable and preserving original order so the last update to that docID wins
return Long.compare(docs.get(i), docs.get(j));
}
}.sort(0, size);
}
/** Returns true if this instance contains any updates. */
abstract boolean any();
abstract int size();
synchronized final boolean any() {
return size > 0;
}
synchronized final int size() {
return size;
}
final synchronized int add(int doc) {
if (finished) {
throw new IllegalStateException("already finished");
}
assert doc < maxDoc;
// TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
if (size == Integer.MAX_VALUE) {
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
}
// grow the structures to have room for more elements
if (docs.size() == size) {
grow(size+1);
}
docs.set(size, doc);
++size;
return size-1;
}
protected void swap(int i, int j) {
long tmpDoc = docs.get(j);
docs.set(j, docs.get(i));
docs.set(i, tmpDoc);
}
protected void grow(int size) {
docs = docs.grow(size);
}
protected void resize(int size) {
docs = docs.resize(size);
}
protected final void ensureFinished() {
if (finished == false) {
throw new IllegalStateException("call finish first");
}
}
@Override
public long ramBytesUsed() {
return docs.ramBytesUsed()
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 2 * Integer.BYTES
+ 2 + Long.BYTES
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF;
}
// TODO: can't this just be NumericDocValues now? avoid boxing the long value...
protected abstract static class AbstractIterator extends DocValuesFieldUpdates.Iterator {
private final int size;
private final PagedMutable docs;
private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE
private int doc = -1;
private final long delGen;
AbstractIterator(int size, PagedMutable docs, long delGen) {
this.size = size;
this.docs = docs;
this.delGen = delGen;
}
@Override
public final int nextDoc() {
if (idx >= size) {
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
doc = (int) docs.get(idx);
++idx;
while (idx < size && docs.get(idx) == doc) {
// scan forward to last update to this doc
++idx;
}
set(idx-1);
return doc;
}
/**
* Called when the iterator moved to the next document
* @param idx the internal index to set the value to
*/
protected abstract void set(long idx);
@Override
public final int docID() {
return doc;
}
@Override
final long delGen() {
return delGen;
}
}
}

View File

@ -17,9 +17,7 @@
package org.apache.lucene.index;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedGrowableWriter;
@ -35,23 +33,14 @@ import org.apache.lucene.util.packed.PagedMutable;
final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
// TODO: can't this just be NumericDocValues now? avoid boxing the long value...
final static class Iterator extends DocValuesFieldUpdates.Iterator {
private final int size;
final static class Iterator extends DocValuesFieldUpdates.AbstractIterator {
private final PagedGrowableWriter values;
private final PagedMutable docs;
private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE
private int doc = -1;
private long value;
private final long delGen;
Iterator(int size, PagedGrowableWriter values, PagedMutable docs, long delGen) {
this.size = size;
this.values = values;
this.docs = docs;
this.delGen = delGen;
}
Iterator(int size, PagedGrowableWriter values, PagedMutable docs, long delGen) {
super(size, docs, delGen);
this.values = values;
}
@Override
long longValue() {
return value;
@ -63,49 +52,16 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
}
@Override
public int nextDoc() {
if (idx >= size) {
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
doc = (int) docs.get(idx);
++idx;
while (idx < size && docs.get(idx) == doc) {
// scan forward to last update to this doc
++idx;
}
// idx points to the "next" element
value = values.get(idx - 1);
return doc;
}
@Override
public int docID() {
return doc;
}
@Override
long delGen() {
return delGen;
protected void set(long idx) {
value = values.get(idx);
}
}
private final int bitsPerValue;
private PagedMutable docs;
private PagedGrowableWriter values;
private int size;
public NumericDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
super(maxDoc, delGen, field, DocValuesType.NUMERIC);
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
}
@Override
public int size() {
return size;
}
@Override
void add(int doc, BytesRef value) {
throw new UnsupportedOperationException();
@ -116,84 +72,43 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
add(docId, iterator.longValue());
}
@Override
synchronized void add(int doc, long value) {
if (finished) {
throw new IllegalStateException("already finished");
}
assert doc < maxDoc;
// TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
if (size == Integer.MAX_VALUE) {
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
}
// grow the structures to have room for more elements
if (docs.size() == size) {
docs = docs.grow(size + 1);
values = values.grow(size + 1);
}
docs.set(size, doc);
values.set(size, value);
++size;
int add = add(doc);
values.set(add, value);
}
@Override
public void finish() {
if (finished) {
throw new IllegalStateException("already finished");
}
finished = true;
protected void swap(int i, int j) {
super.swap(i, j);
long tmpVal = values.get(j);
values.set(j, values.get(i));
values.set(i, tmpVal);
}
// shrink wrap
if (size < docs.size()) {
docs = docs.resize(size);
values = values.resize(size);
}
@Override
protected void grow(int size) {
super.grow(size);
values = values.grow(size);
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpDoc = docs.get(j);
docs.set(j, docs.get(i));
docs.set(i, tmpDoc);
long tmpVal = values.get(j);
values.set(j, values.get(i));
values.set(i, tmpVal);
}
@Override
protected int compare(int i, int j) {
// increasing docID order:
// NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
// stable and preserving original order so the last update to that docID wins
return Long.compare(docs.get(i), docs.get(j));
}
}.sort(0, size);
@Override
protected void resize(int size) {
super.resize(size);
values = values.resize(size);
}
@Override
Iterator iterator() {
if (finished == false) {
throw new IllegalStateException("call finish first");
}
ensureFinished();
return new Iterator(size, values, docs, delGen);
}
@Override
boolean any() {
return size > 0;
}
@Override
public long ramBytesUsed() {
return values.ramBytesUsed()
+ docs.ramBytesUsed()
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 2 * Integer.BYTES
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+ super.ramBytesUsed()
+ Long.BYTES
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF;
}
}

View File

@ -248,25 +248,6 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
}
};
}
@Override
public void finish() {
}
@Override
public boolean any() {
return true;
}
@Override
public long ramBytesUsed() {
return 0;
}
@Override
public int size() {
return 1;
}
};
}
}