mirror of https://github.com/apache/lucene.git
LUCENE-5113: Added (Monotonic)AppendingLongBuffer.freeze to pack the pending values.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1503578 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5f631cfa25
commit
b5db1adfc2
|
@ -77,6 +77,7 @@ public class MergeState {
|
|||
++del;
|
||||
}
|
||||
}
|
||||
docMap.freeze();
|
||||
final int numDeletedDocs = del;
|
||||
assert docMap.size() == maxDoc;
|
||||
return new DocMap() {
|
||||
|
|
|
@ -328,6 +328,11 @@ public class MultiDocValues {
|
|||
}
|
||||
globalOrd++;
|
||||
}
|
||||
subIndexes.freeze();
|
||||
globalOrdDeltas.freeze();
|
||||
for (int i = 0; i < ordDeltas.length; ++i) {
|
||||
ordDeltas[i].freeze();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1037,6 +1037,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
termOrd++;
|
||||
}
|
||||
}
|
||||
termOrdToBytesOffset.freeze();
|
||||
|
||||
// maybe an int-only impl?
|
||||
return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd);
|
||||
|
|
|
@ -37,7 +37,7 @@ abstract class AbstractAppendingLongBuffer {
|
|||
PackedInts.Reader[] deltas;
|
||||
private long deltasBytes;
|
||||
int valuesOff;
|
||||
final long[] pending;
|
||||
long[] pending;
|
||||
int pendingOff;
|
||||
|
||||
AbstractAppendingLongBuffer(int initialBlockCount, int pageSize) {
|
||||
|
@ -50,13 +50,27 @@ abstract class AbstractAppendingLongBuffer {
|
|||
pendingOff = 0;
|
||||
}
|
||||
|
||||
final int pageSize() {
|
||||
return pageMask + 1;
|
||||
}
|
||||
|
||||
/** Get the number of values that have been added to the buffer. */
|
||||
public final long size() {
|
||||
return valuesOff * (long) pending.length + pendingOff;
|
||||
long size = pendingOff;
|
||||
if (valuesOff > 0) {
|
||||
size += deltas[valuesOff - 1].size();
|
||||
}
|
||||
if (valuesOff > 1) {
|
||||
size += (long) (valuesOff - 1) * pageSize();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/** Append a value to this buffer. */
|
||||
public final void add(long l) {
|
||||
if (pending == null) {
|
||||
throw new IllegalStateException("This buffer is frozen");
|
||||
}
|
||||
if (pendingOff == pending.length) {
|
||||
// check size
|
||||
if (deltas.length == valuesOff) {
|
||||
|
@ -64,9 +78,7 @@ abstract class AbstractAppendingLongBuffer {
|
|||
grow(newLength);
|
||||
}
|
||||
packPendingValues();
|
||||
if (deltas[valuesOff] != null) {
|
||||
deltasBytes += deltas[valuesOff].ramBytesUsed();
|
||||
}
|
||||
deltasBytes += deltas[valuesOff].ramBytesUsed();
|
||||
++valuesOff;
|
||||
// reset pending buffer
|
||||
pendingOff = 0;
|
||||
|
@ -99,13 +111,15 @@ abstract class AbstractAppendingLongBuffer {
|
|||
|
||||
long[] currentValues;
|
||||
int vOff, pOff;
|
||||
int currentCount; // number of entries of the current page
|
||||
|
||||
Iterator() {
|
||||
vOff = pOff = 0;
|
||||
if (valuesOff == 0) {
|
||||
currentValues = pending;
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
currentValues = new long[pending.length];
|
||||
currentValues = new long[deltas[0].size()];
|
||||
fillValues();
|
||||
}
|
||||
}
|
||||
|
@ -114,18 +128,20 @@ abstract class AbstractAppendingLongBuffer {
|
|||
|
||||
/** Whether or not there are remaining values. */
|
||||
public final boolean hasNext() {
|
||||
return vOff < valuesOff || (vOff == valuesOff && pOff < pendingOff);
|
||||
return pOff < currentCount;
|
||||
}
|
||||
|
||||
/** Return the next long in the buffer. */
|
||||
public final long next() {
|
||||
assert hasNext();
|
||||
long result = currentValues[pOff++];
|
||||
if (pOff == pending.length) {
|
||||
if (pOff == currentCount) {
|
||||
vOff += 1;
|
||||
pOff = 0;
|
||||
if (vOff <= valuesOff) {
|
||||
fillValues();
|
||||
} else {
|
||||
currentCount = 0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
@ -136,7 +152,9 @@ abstract class AbstractAppendingLongBuffer {
|
|||
long baseRamBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||
+ 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 3 arrays
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT; // the 2 offsets
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT // the 2 offsets
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG; // deltasBytes
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -145,13 +163,25 @@ abstract class AbstractAppendingLongBuffer {
|
|||
public long ramBytesUsed() {
|
||||
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
|
||||
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG // valuesBytes
|
||||
+ RamUsageEstimator.sizeOf(pending)
|
||||
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
|
||||
+ RamUsageEstimator.sizeOf(minValues)
|
||||
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * deltas.length); // values
|
||||
|
||||
return bytesUsed + deltasBytes;
|
||||
}
|
||||
|
||||
/** Pack all pending values in this buffer. Subsequent calls to {@link #add(long)} will fail. */
|
||||
public void freeze() {
|
||||
if (pendingOff > 0) {
|
||||
if (deltas.length == valuesOff) {
|
||||
grow(valuesOff + 1); // don't oversize!
|
||||
}
|
||||
packPendingValues();
|
||||
deltasBytes += deltas[valuesOff].ramBytesUsed();
|
||||
++valuesOff;
|
||||
pendingOff = 0;
|
||||
}
|
||||
pending = null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.util.packed;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Utility class to buffer a list of signed longs in memory. This class only
|
||||
|
@ -52,8 +51,6 @@ public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
|
|||
|
||||
@Override
|
||||
void packPendingValues() {
|
||||
assert pendingOff == pending.length;
|
||||
|
||||
// compute max delta
|
||||
long minValue = pending[0];
|
||||
long maxValue = pending[0];
|
||||
|
@ -64,7 +61,9 @@ public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
|
|||
final long delta = maxValue - minValue;
|
||||
|
||||
minValues[valuesOff] = minValue;
|
||||
if (delta != 0) {
|
||||
if (delta == 0) {
|
||||
deltas[valuesOff] = new PackedInts.NullReader(pendingOff);
|
||||
} else {
|
||||
// build a new packed reader
|
||||
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
|
||||
for (int i = 0; i < pendingOff; ++i) {
|
||||
|
@ -95,13 +94,13 @@ public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
|
|||
void fillValues() {
|
||||
if (vOff == valuesOff) {
|
||||
currentValues = pending;
|
||||
} else if (deltas[vOff] == null) {
|
||||
Arrays.fill(currentValues, minValues[vOff]);
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
for (int k = 0; k < pending.length; ) {
|
||||
k += deltas[vOff].get(k, currentValues, k, pending.length - k);
|
||||
currentCount = deltas[vOff].size();
|
||||
for (int k = 0; k < currentCount; ) {
|
||||
k += deltas[vOff].get(k, currentValues, k, currentCount - k);
|
||||
}
|
||||
for (int k = 0; k < pending.length; ++k) {
|
||||
for (int k = 0; k < currentCount; ++k) {
|
||||
currentValues[k] += minValues[vOff];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
|
|||
* @param pageSize the size of a single page */
|
||||
public MonotonicAppendingLongBuffer(int initialPageCount, int pageSize) {
|
||||
super(initialPageCount, pageSize);
|
||||
averages = new float[pending.length];
|
||||
averages = new float[pageSize];
|
||||
}
|
||||
|
||||
/** Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16
|
||||
|
@ -74,16 +74,15 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
|
|||
|
||||
@Override
|
||||
void packPendingValues() {
|
||||
assert pendingOff == pending.length;
|
||||
|
||||
assert pendingOff > 0;
|
||||
minValues[valuesOff] = pending[0];
|
||||
averages[valuesOff] = (float) (pending[pending.length - 1] - pending[0]) / (pending.length - 1);
|
||||
averages[valuesOff] = pendingOff == 1 ? 0 : (float) (pending[pendingOff - 1] - pending[0]) / (pendingOff - 1);
|
||||
|
||||
for (int i = 0; i < pending.length; ++i) {
|
||||
for (int i = 0; i < pendingOff; ++i) {
|
||||
pending[i] = zigZagEncode(pending[i] - minValues[valuesOff] - (long) (averages[valuesOff] * (long) i));
|
||||
}
|
||||
long maxDelta = 0;
|
||||
for (int i = 0; i < pending.length; ++i) {
|
||||
for (int i = 0; i < pendingOff; ++i) {
|
||||
if (pending[i] < 0) {
|
||||
maxDelta = -1;
|
||||
break;
|
||||
|
@ -91,7 +90,9 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
|
|||
maxDelta = Math.max(maxDelta, pending[i]);
|
||||
}
|
||||
}
|
||||
if (maxDelta != 0) {
|
||||
if (maxDelta == 0) {
|
||||
deltas[valuesOff] = new PackedInts.NullReader(pendingOff);
|
||||
} else {
|
||||
final int bitsRequired = maxDelta < 0 ? 64 : PackedInts.bitsRequired(maxDelta);
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, PackedInts.COMPACT);
|
||||
for (int i = 0; i < pendingOff; ) {
|
||||
|
@ -118,15 +119,13 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
|
|||
void fillValues() {
|
||||
if (vOff == valuesOff) {
|
||||
currentValues = pending;
|
||||
} else if (deltas[vOff] == null) {
|
||||
for (int k = 0; k < pending.length; ++k) {
|
||||
currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k);
|
||||
}
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
for (int k = 0; k < pending.length; ) {
|
||||
k += deltas[vOff].get(k, currentValues, k, pending.length - k);
|
||||
currentCount = deltas[vOff].size();
|
||||
for (int k = 0; k < currentCount; ) {
|
||||
k += deltas[vOff].get(k, currentValues, k, currentCount - k);
|
||||
}
|
||||
for (int k = 0; k < pending.length; ++k) {
|
||||
for (int k = 0; k < currentCount; ++k) {
|
||||
currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k) + zigZagDecode(currentValues[k]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,12 +18,14 @@ package org.apache.lucene.util.packed;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Simplistic compression for array of unsigned long values.
|
||||
|
@ -703,7 +705,8 @@ public class PackedInts {
|
|||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
return 0;
|
||||
Arrays.fill(arr, off, off + len, 0);
|
||||
return len;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -718,7 +721,7 @@ public class PackedInts {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -958,6 +958,14 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
buf.add(arr[i]);
|
||||
}
|
||||
assertEquals(arr.length, buf.size());
|
||||
if (random().nextBoolean()) {
|
||||
buf.freeze();
|
||||
if (random().nextBoolean()) {
|
||||
// Make sure double freeze doesn't break anything
|
||||
buf.freeze();
|
||||
}
|
||||
}
|
||||
assertEquals(arr.length, buf.size());
|
||||
final AbstractAppendingLongBuffer.Iterator it = buf.iterator();
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -973,8 +981,7 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
|
||||
final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
|
||||
final long computedBytesUsed = buf.ramBytesUsed();
|
||||
assertEquals("got " + computedBytesUsed + ", expected: " + expectedBytesUsed,
|
||||
expectedBytesUsed, computedBytesUsed);
|
||||
assertEquals(expectedBytesUsed, computedBytesUsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -191,6 +191,7 @@ public abstract class Sorter {
|
|||
for (int i = 0; i < maxDoc; ++i) {
|
||||
newToOld.add(docs[i]);
|
||||
}
|
||||
newToOld.freeze();
|
||||
|
||||
for (int i = 0; i < maxDoc; ++i) {
|
||||
docs[(int) newToOld.get(i)] = i;
|
||||
|
@ -200,6 +201,7 @@ public abstract class Sorter {
|
|||
for (int i = 0; i < maxDoc; ++i) {
|
||||
oldToNew.add(docs[i]);
|
||||
}
|
||||
oldToNew.freeze();
|
||||
|
||||
return new Sorter.DocMap() {
|
||||
|
||||
|
|
|
@ -107,6 +107,7 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
}
|
||||
}
|
||||
}
|
||||
deletes.freeze();
|
||||
return deletes;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue