mirror of https://github.com/apache/lucene.git
LUCENE-5145: AppendingPackedLongBuffer and added suport for bulk get operations to the Appending*Buffers.
Introduced bulk retrieval to AbstractAppendingLongBuffer classes, for faster retrieval. Introduced a new variant, AppendingPackedLongBuffer which solely relies on PackedInts as a backend. This new class is useful where people have non-negative numbers with a uniform distribution over a fixed (limited) range. Ex. facets ordinals. To distinguish it from AppendingPackedLongBuffer, delta based AppendingLongBuffer was renamed to AppendingDeltaPackedLongBuffer Fixed an Issue with NullReader where it didn't respect it's valueCount in bulk gets. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1508423 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3bff59a588
commit
2c5856cf25
|
@ -136,6 +136,13 @@ Optimizations
|
|||
* LUCENE-5119: DiskDV keeps the document-to-ordinal mapping on disk for
|
||||
SortedDocValues. (Robert Muir)
|
||||
|
||||
* LUCENE-5145: New AppendingPackedLongBuffer, a new variant of the former
|
||||
AppendingLongBuffer which assumes values are 0-based.
|
||||
(Boaz Leskes via Adrien Grand)
|
||||
|
||||
* LUCENE-5145: All Appending*Buffer now support bulk get.
|
||||
(Boaz Leskes via Adrien Grand)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-4894: remove facet userguide as it was outdated. Partially absorbed into
|
||||
|
|
|
@ -26,7 +26,8 @@ import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
|||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
||||
|
||||
|
@ -36,14 +37,14 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
|||
class BinaryDocValuesWriter extends DocValuesWriter {
|
||||
|
||||
private final ByteBlockPool pool;
|
||||
private final AppendingLongBuffer lengths;
|
||||
private final AppendingDeltaPackedLongBuffer lengths;
|
||||
private final FieldInfo fieldInfo;
|
||||
private int addedValues = 0;
|
||||
|
||||
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
|
||||
this.lengths = new AppendingLongBuffer();
|
||||
this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
|
||||
}
|
||||
|
||||
public void addValue(int docID, BytesRef value) {
|
||||
|
@ -90,7 +91,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
// iterates over the values we have in ram
|
||||
private class BytesIterator implements Iterator<BytesRef> {
|
||||
final BytesRef value = new BytesRef();
|
||||
final AppendingLongBuffer.Iterator lengthsIterator = lengths.iterator();
|
||||
final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
|
||||
final int size = (int) lengths.size();
|
||||
final int maxDoc;
|
||||
int upto;
|
||||
|
|
|
@ -23,8 +23,9 @@ import java.util.List;
|
|||
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
|
||||
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* A wrapper for CompositeIndexReader providing access to DocValues.
|
||||
|
@ -277,7 +278,7 @@ public class MultiDocValues {
|
|||
// globalOrd -> (globalOrd - segmentOrd)
|
||||
final MonotonicAppendingLongBuffer globalOrdDeltas;
|
||||
// globalOrd -> sub index
|
||||
final AppendingLongBuffer subIndexes;
|
||||
final AppendingPackedLongBuffer subIndexes;
|
||||
// segmentOrd -> (globalOrd - segmentOrd)
|
||||
final MonotonicAppendingLongBuffer ordDeltas[];
|
||||
|
||||
|
@ -293,8 +294,8 @@ public class MultiDocValues {
|
|||
// create the ordinal mappings by pulling a termsenum over each sub's
|
||||
// unique terms, and walking a multitermsenum over those
|
||||
this.owner = owner;
|
||||
globalOrdDeltas = new MonotonicAppendingLongBuffer();
|
||||
subIndexes = new AppendingLongBuffer();
|
||||
globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
|
||||
subIndexes = new AppendingPackedLongBuffer(PackedInts.COMPACT);
|
||||
ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
|
||||
for (int i = 0; i < ordDeltas.length; i++) {
|
||||
ordDeltas[i] = new MonotonicAppendingLongBuffer();
|
||||
|
|
|
@ -23,7 +23,8 @@ import java.util.NoSuchElementException;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** Buffers up pending long per doc, then flushes when
|
||||
* segment flushes. */
|
||||
|
@ -31,13 +32,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
private final static long MISSING = 0L;
|
||||
|
||||
private AppendingLongBuffer pending;
|
||||
private AppendingDeltaPackedLongBuffer pending;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed;
|
||||
private final FieldInfo fieldInfo;
|
||||
|
||||
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||
pending = new AppendingLongBuffer();
|
||||
pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
|
||||
bytesUsed = pending.ramBytesUsed();
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.iwBytesUsed = iwBytesUsed;
|
||||
|
@ -89,7 +90,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
// iterates over the values we have in ram
|
||||
private class NumericIterator implements Iterator<Number> {
|
||||
final AppendingLongBuffer.Iterator iter = pending.iterator();
|
||||
final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
|
||||
final int size = (int)pending.size();
|
||||
final int maxDoc;
|
||||
int upto;
|
||||
|
|
|
@ -30,13 +30,14 @@ import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
|||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** Buffers up pending byte[] per doc, deref and sorting via
|
||||
* int ord, then flushes when segment flushes. */
|
||||
class SortedDocValuesWriter extends DocValuesWriter {
|
||||
final BytesRefHash hash;
|
||||
private AppendingLongBuffer pending;
|
||||
private AppendingPackedLongBuffer pending;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed; // this currently only tracks differences in 'pending'
|
||||
private final FieldInfo fieldInfo;
|
||||
|
@ -51,7 +52,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
|
||||
BytesRefHash.DEFAULT_CAPACITY,
|
||||
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
|
||||
pending = new AppendingLongBuffer();
|
||||
pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
|
||||
bytesUsed = pending.ramBytesUsed();
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
}
|
||||
|
@ -176,7 +177,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
// iterates over the ords for each doc we have in ram
|
||||
private class OrdsIterator implements Iterator<Number> {
|
||||
final AppendingLongBuffer.Iterator iter = pending.iterator();
|
||||
final AppendingPackedLongBuffer.Iterator iter = pending.iterator();
|
||||
final int ordMap[];
|
||||
final int maxDoc;
|
||||
int docUpto;
|
||||
|
|
|
@ -32,14 +32,16 @@ import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
|||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** Buffers up pending byte[]s per doc, deref and sorting via
|
||||
* int ord, then flushes when segment flushes. */
|
||||
class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||
final BytesRefHash hash;
|
||||
private AppendingLongBuffer pending; // stream of all termIDs
|
||||
private AppendingLongBuffer pendingCounts; // termIDs per doc
|
||||
private AppendingPackedLongBuffer pending; // stream of all termIDs
|
||||
private AppendingDeltaPackedLongBuffer pendingCounts; // termIDs per doc
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
||||
private final FieldInfo fieldInfo;
|
||||
|
@ -56,8 +58,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
|
||||
BytesRefHash.DEFAULT_CAPACITY,
|
||||
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
|
||||
pending = new AppendingLongBuffer();
|
||||
pendingCounts = new AppendingLongBuffer();
|
||||
pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
|
||||
pendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
|
||||
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
}
|
||||
|
@ -224,8 +226,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
// iterates over the ords for each doc we have in ram
|
||||
private class OrdsIterator implements Iterator<Number> {
|
||||
final AppendingLongBuffer.Iterator iter = pending.iterator();
|
||||
final AppendingLongBuffer.Iterator counts = pendingCounts.iterator();
|
||||
final AppendingPackedLongBuffer.Iterator iter = pending.iterator();
|
||||
final AppendingDeltaPackedLongBuffer.Iterator counts = pendingCounts.iterator();
|
||||
final int ordMap[];
|
||||
final long numOrds;
|
||||
long ordUpto;
|
||||
|
@ -273,7 +275,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
}
|
||||
|
||||
private class OrdCountIterator implements Iterator<Number> {
|
||||
final AppendingLongBuffer.Iterator iter = pendingCounts.iterator();
|
||||
final AppendingDeltaPackedLongBuffer.Iterator iter = pendingCounts.iterator();
|
||||
final int maxDoc;
|
||||
int docUpto;
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* {@link DocIdSet} implementation based on word-aligned hybrid encoding on
|
||||
|
@ -330,9 +331,9 @@ public final class WAH8DocIdSet extends DocIdSet {
|
|||
} else {
|
||||
final int pageSize = 128;
|
||||
final int initialPageCount = (valueCount + pageSize - 1) / pageSize;
|
||||
final MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize);
|
||||
final MonotonicAppendingLongBuffer wordNums = new MonotonicAppendingLongBuffer(initialPageCount, pageSize);
|
||||
|
||||
final MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
|
||||
final MonotonicAppendingLongBuffer wordNums = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
|
||||
|
||||
positions.add(0L);
|
||||
wordNums.add(0L);
|
||||
final Iterator it = new Iterator(data, cardinality, Integer.MAX_VALUE, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
|
||||
|
|
|
@ -17,14 +17,14 @@ package org.apache.lucene.util.packed;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/** Common functionality shared by {@link AppendingLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
|
||||
|
||||
/** Common functionality shared by {@link AppendingDeltaPackedLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
|
||||
abstract class AbstractAppendingLongBuffer {
|
||||
|
||||
static final int MIN_PAGE_SIZE = 64;
|
||||
|
@ -33,21 +33,21 @@ abstract class AbstractAppendingLongBuffer {
|
|||
static final int MAX_PAGE_SIZE = 1 << 20;
|
||||
|
||||
final int pageShift, pageMask;
|
||||
long[] minValues;
|
||||
PackedInts.Reader[] deltas;
|
||||
private long deltasBytes;
|
||||
PackedInts.Reader[] values;
|
||||
private long valuesBytes;
|
||||
int valuesOff;
|
||||
long[] pending;
|
||||
int pendingOff;
|
||||
float acceptableOverheadRatio;
|
||||
|
||||
AbstractAppendingLongBuffer(int initialBlockCount, int pageSize) {
|
||||
minValues = new long[initialBlockCount];
|
||||
deltas = new PackedInts.Reader[initialBlockCount];
|
||||
AbstractAppendingLongBuffer(int initialBlockCount, int pageSize, float acceptableOverheadRatio) {
|
||||
values = new PackedInts.Reader[initialBlockCount];
|
||||
pending = new long[pageSize];
|
||||
pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
|
||||
pageMask = pageSize - 1;
|
||||
valuesOff = 0;
|
||||
pendingOff = 0;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
}
|
||||
|
||||
final int pageSize() {
|
||||
|
@ -58,7 +58,7 @@ abstract class AbstractAppendingLongBuffer {
|
|||
public final long size() {
|
||||
long size = pendingOff;
|
||||
if (valuesOff > 0) {
|
||||
size += deltas[valuesOff - 1].size();
|
||||
size += values[valuesOff - 1].size();
|
||||
}
|
||||
if (valuesOff > 1) {
|
||||
size += (long) (valuesOff - 1) * pageSize();
|
||||
|
@ -73,12 +73,12 @@ abstract class AbstractAppendingLongBuffer {
|
|||
}
|
||||
if (pendingOff == pending.length) {
|
||||
// check size
|
||||
if (deltas.length == valuesOff) {
|
||||
if (values.length == valuesOff) {
|
||||
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
|
||||
grow(newLength);
|
||||
}
|
||||
packPendingValues();
|
||||
deltasBytes += deltas[valuesOff].ramBytesUsed();
|
||||
valuesBytes += values[valuesOff].ramBytesUsed();
|
||||
++valuesOff;
|
||||
// reset pending buffer
|
||||
pendingOff = 0;
|
||||
|
@ -87,8 +87,7 @@ abstract class AbstractAppendingLongBuffer {
|
|||
}
|
||||
|
||||
void grow(int newBlockCount) {
|
||||
minValues = Arrays.copyOf(minValues, newBlockCount);
|
||||
deltas = Arrays.copyOf(deltas, newBlockCount);
|
||||
values = Arrays.copyOf(values, newBlockCount);
|
||||
}
|
||||
|
||||
abstract void packPendingValues();
|
||||
|
@ -101,11 +100,33 @@ abstract class AbstractAppendingLongBuffer {
|
|||
return get(block, element);
|
||||
}
|
||||
|
||||
/**
|
||||
* Bulk get: read at least one and at most <code>len</code> longs starting
|
||||
* from <code>index</code> into <code>arr[off:off+len]</code> and return
|
||||
* the actual number of values that have been read.
|
||||
*/
|
||||
public final int get(long index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < size();
|
||||
assert off + len <= arr.length;
|
||||
|
||||
int block = (int) (index >> pageShift);
|
||||
int element = (int) (index & pageMask);
|
||||
return get(block, element, arr, off, len);
|
||||
}
|
||||
|
||||
|
||||
abstract long get(int block, int element);
|
||||
|
||||
abstract Iterator iterator();
|
||||
abstract int get(int block, int element, long[] arr, int off, int len);
|
||||
|
||||
abstract class Iterator {
|
||||
|
||||
/** Return an iterator over the values of this buffer. */
|
||||
public Iterator iterator() {
|
||||
return new Iterator();
|
||||
}
|
||||
|
||||
final public class Iterator {
|
||||
|
||||
long[] currentValues;
|
||||
int vOff, pOff;
|
||||
|
@ -117,12 +138,22 @@ abstract class AbstractAppendingLongBuffer {
|
|||
currentValues = pending;
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
currentValues = new long[deltas[0].size()];
|
||||
currentValues = new long[values[0].size()];
|
||||
fillValues();
|
||||
}
|
||||
}
|
||||
|
||||
abstract void fillValues();
|
||||
void fillValues() {
|
||||
if (vOff == valuesOff) {
|
||||
currentValues = pending;
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
currentCount = values[vOff].size();
|
||||
for (int k = 0; k < currentCount; ) {
|
||||
k += get(vOff, k, currentValues, k, currentCount - k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Whether or not there are remaining values. */
|
||||
public final boolean hasNext() {
|
||||
|
@ -149,33 +180,31 @@ abstract class AbstractAppendingLongBuffer {
|
|||
|
||||
long baseRamBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||
+ 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 3 arrays
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 2 arrays
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT // the 2 offsets
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG; // deltasBytes
|
||||
+ RamUsageEstimator.NUM_BYTES_FLOAT // acceptable overhead
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG; // valuesBytes
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of bytes used by this instance.
|
||||
*/
|
||||
/** Return the number of bytes used by this instance. */
|
||||
public long ramBytesUsed() {
|
||||
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
|
||||
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
|
||||
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
|
||||
+ RamUsageEstimator.sizeOf(minValues)
|
||||
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * deltas.length); // values
|
||||
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
|
||||
|
||||
return bytesUsed + deltasBytes;
|
||||
return bytesUsed + valuesBytes;
|
||||
}
|
||||
|
||||
/** Pack all pending values in this buffer. Subsequent calls to {@link #add(long)} will fail. */
|
||||
public void freeze() {
|
||||
if (pendingOff > 0) {
|
||||
if (deltas.length == valuesOff) {
|
||||
if (values.length == valuesOff) {
|
||||
grow(valuesOff + 1); // don't oversize!
|
||||
}
|
||||
packPendingValues();
|
||||
deltasBytes += deltas[valuesOff].ramBytesUsed();
|
||||
valuesBytes += values[valuesOff].ramBytesUsed();
|
||||
++valuesOff;
|
||||
pendingOff = 0;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Utility class to buffer a list of signed longs in memory. This class only
|
||||
* supports appending and is optimized for the case where values are close to
|
||||
* each other.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class AppendingDeltaPackedLongBuffer extends AbstractAppendingLongBuffer {
|
||||
|
||||
long[] minValues;
|
||||
|
||||
|
||||
/** Create {@link AppendingDeltaPackedLongBuffer}
|
||||
* @param initialPageCount the initial number of pages
|
||||
* @param pageSize the size of a single page
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
*/
|
||||
public AppendingDeltaPackedLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
|
||||
super(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
minValues = new long[values.length];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link AppendingDeltaPackedLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
|
||||
*/
|
||||
public AppendingDeltaPackedLongBuffer() {
|
||||
this(16, 1024, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link AppendingDeltaPackedLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024
|
||||
*/
|
||||
public AppendingDeltaPackedLongBuffer(float acceptableOverheadRatio) {
|
||||
this(16, 1024, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
@Override
|
||||
long get(int block, int element) {
|
||||
if (block == valuesOff) {
|
||||
return pending[element];
|
||||
} else if (values[block] == null) {
|
||||
return minValues[block];
|
||||
} else {
|
||||
return minValues[block] + values[block].get(element);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
int get(int block, int element, long[] arr, int off, int len) {
|
||||
if (block == valuesOff) {
|
||||
int sysCopyToRead = Math.min(len, pendingOff - element);
|
||||
System.arraycopy(pending, element, arr, off, sysCopyToRead);
|
||||
return sysCopyToRead;
|
||||
} else {
|
||||
/* packed block */
|
||||
int read = values[block].get(element, arr, off, len);
|
||||
long d = minValues[block];
|
||||
for (int r = 0; r < read; r++, off++) {
|
||||
arr[off] += d;
|
||||
}
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void packPendingValues() {
|
||||
// compute max delta
|
||||
long minValue = pending[0];
|
||||
long maxValue = pending[0];
|
||||
for (int i = 1; i < pendingOff; ++i) {
|
||||
minValue = Math.min(minValue, pending[i]);
|
||||
maxValue = Math.max(maxValue, pending[i]);
|
||||
}
|
||||
final long delta = maxValue - minValue;
|
||||
|
||||
minValues[valuesOff] = minValue;
|
||||
if (delta == 0) {
|
||||
values[valuesOff] = new PackedInts.NullReader(pendingOff);
|
||||
} else {
|
||||
// build a new packed reader
|
||||
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
|
||||
for (int i = 0; i < pendingOff; ++i) {
|
||||
pending[i] -= minValue;
|
||||
}
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
|
||||
for (int i = 0; i < pendingOff; ) {
|
||||
i += mutable.set(i, pending, i, pendingOff - i);
|
||||
}
|
||||
values[valuesOff] = mutable;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void grow(int newBlockCount) {
|
||||
super.grow(newBlockCount);
|
||||
this.minValues = Arrays.copyOf(minValues, newBlockCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
long baseRamBytesUsed() {
|
||||
return super.baseRamBytesUsed()
|
||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF; // additional array
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return super.ramBytesUsed() + RamUsageEstimator.sizeOf(minValues);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Utility class to buffer a list of signed longs in memory. This class only
|
||||
* supports appending and is optimized for the case where values are close to
|
||||
* each other.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
|
||||
|
||||
/** @param initialPageCount the initial number of pages
|
||||
* @param pageSize the size of a single page */
|
||||
public AppendingLongBuffer(int initialPageCount, int pageSize) {
|
||||
super(initialPageCount, pageSize);
|
||||
}
|
||||
|
||||
/** Create an {@link AppendingLongBuffer} with initialPageCount=16 and
|
||||
* pageSize=1024. */
|
||||
public AppendingLongBuffer() {
|
||||
this(16, 1024);
|
||||
}
|
||||
|
||||
@Override
|
||||
long get(int block, int element) {
|
||||
if (block == valuesOff) {
|
||||
return pending[element];
|
||||
} else if (deltas[block] == null) {
|
||||
return minValues[block];
|
||||
} else {
|
||||
return minValues[block] + deltas[block].get(element);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void packPendingValues() {
|
||||
// compute max delta
|
||||
long minValue = pending[0];
|
||||
long maxValue = pending[0];
|
||||
for (int i = 1; i < pendingOff; ++i) {
|
||||
minValue = Math.min(minValue, pending[i]);
|
||||
maxValue = Math.max(maxValue, pending[i]);
|
||||
}
|
||||
final long delta = maxValue - minValue;
|
||||
|
||||
minValues[valuesOff] = minValue;
|
||||
if (delta == 0) {
|
||||
deltas[valuesOff] = new PackedInts.NullReader(pendingOff);
|
||||
} else {
|
||||
// build a new packed reader
|
||||
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
|
||||
for (int i = 0; i < pendingOff; ++i) {
|
||||
pending[i] -= minValue;
|
||||
}
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, PackedInts.COMPACT);
|
||||
for (int i = 0; i < pendingOff; ) {
|
||||
i += mutable.set(i, pending, i, pendingOff - i);
|
||||
}
|
||||
deltas[valuesOff] = mutable;
|
||||
}
|
||||
}
|
||||
|
||||
/** Return an iterator over the values of this buffer. */
|
||||
@Override
|
||||
public Iterator iterator() {
|
||||
return new Iterator();
|
||||
}
|
||||
|
||||
/** A long iterator. */
|
||||
public final class Iterator extends AbstractAppendingLongBuffer.Iterator {
|
||||
|
||||
Iterator() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
void fillValues() {
|
||||
if (vOff == valuesOff) {
|
||||
currentValues = pending;
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
currentCount = deltas[vOff].size();
|
||||
for (int k = 0; k < currentCount; ) {
|
||||
k += deltas[vOff].get(k, currentValues, k, currentCount - k);
|
||||
}
|
||||
for (int k = 0; k < currentCount; ++k) {
|
||||
currentValues[k] += minValues[vOff];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Utility class to buffer a list of signed longs in memory. This class only
|
||||
* supports appending and is optimized for non-negative numbers with a uniform distribution over a fixed (limited) range
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class AppendingPackedLongBuffer extends AbstractAppendingLongBuffer {
|
||||
|
||||
/**{@link AppendingPackedLongBuffer}
|
||||
* @param initialPageCount the initial number of pages
|
||||
* @param pageSize the size of a single page
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
*/
|
||||
public AppendingPackedLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
|
||||
super(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link AppendingPackedLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
|
||||
*/
|
||||
public AppendingPackedLongBuffer() {
|
||||
this(16, 1024, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link AppendingPackedLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024
|
||||
*/
|
||||
public AppendingPackedLongBuffer(float acceptableOverheadRatio) {
|
||||
this(16, 1024, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
@Override
|
||||
long get(int block, int element) {
|
||||
if (block == valuesOff) {
|
||||
return pending[element];
|
||||
} else {
|
||||
return values[block].get(element);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
int get(int block, int element, long[] arr, int off, int len) {
|
||||
if (block == valuesOff) {
|
||||
int sysCopyToRead = Math.min(len, pendingOff - element);
|
||||
System.arraycopy(pending, element, arr, off, sysCopyToRead);
|
||||
return sysCopyToRead;
|
||||
} else {
|
||||
/* packed block */
|
||||
return values[block].get(element, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void packPendingValues() {
|
||||
// compute max delta
|
||||
long minValue = pending[0];
|
||||
long maxValue = pending[0];
|
||||
for (int i = 1; i < pendingOff; ++i) {
|
||||
minValue = Math.min(minValue, pending[i]);
|
||||
maxValue = Math.max(maxValue, pending[i]);
|
||||
}
|
||||
|
||||
|
||||
// build a new packed reader
|
||||
final int bitsRequired = minValue < 0 ? 64 : PackedInts.bitsRequired(maxValue);
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
|
||||
for (int i = 0; i < pendingOff; ) {
|
||||
i += mutable.set(i, pending, i, pendingOff - i);
|
||||
}
|
||||
values[valuesOff] = mutable;
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -17,14 +17,15 @@ package org.apache.lucene.util.packed;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Utility class to buffer signed longs in memory, which is optimized for the
|
||||
* case where the sequence is monotonic, although it can encode any sequence of
|
||||
* arbitrary longs. It only supports appending.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuffer {
|
||||
|
@ -32,36 +33,77 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
|
|||
static long zigZagDecode(long n) {
|
||||
return ((n >>> 1) ^ -(n & 1));
|
||||
}
|
||||
|
||||
|
||||
static long zigZagEncode(long n) {
|
||||
return (n >> 63) ^ (n << 1);
|
||||
}
|
||||
|
||||
float[] averages;
|
||||
long[] minValues;
|
||||
|
||||
/** @param initialPageCount the initial number of pages
|
||||
* @param pageSize the size of a single page */
|
||||
public MonotonicAppendingLongBuffer(int initialPageCount, int pageSize) {
|
||||
super(initialPageCount, pageSize);
|
||||
averages = new float[initialPageCount];
|
||||
/**
|
||||
* @param initialPageCount the initial number of pages
|
||||
* @param pageSize the size of a single page
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
*/
|
||||
public MonotonicAppendingLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
|
||||
super(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
averages = new float[values.length];
|
||||
minValues = new long[values.length];
|
||||
}
|
||||
|
||||
/** Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16
|
||||
* and pageSize=1024. */
|
||||
/**
|
||||
* Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
|
||||
*/
|
||||
public MonotonicAppendingLongBuffer() {
|
||||
this(16, 1024);
|
||||
this(16, 1024, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link AppendingDeltaPackedLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024
|
||||
*/
|
||||
public MonotonicAppendingLongBuffer(float acceptableOverheadRatio) {
|
||||
this(16, 1024, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
long get(int block, int element) {
|
||||
if (block == valuesOff) {
|
||||
return pending[element];
|
||||
} else {
|
||||
final long base = minValues[block] + (long) (averages[block] * (long) element);
|
||||
if (deltas[block] == null) {
|
||||
if (values[block] == null) {
|
||||
return base;
|
||||
} else {
|
||||
return base + zigZagDecode(deltas[block].get(element));
|
||||
return base + zigZagDecode(values[block].get(element));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
int get(int block, int element, long[] arr, int off, int len) {
|
||||
if (block == valuesOff) {
|
||||
int sysCopyToRead = Math.min(len, pendingOff - element);
|
||||
System.arraycopy(pending, element, arr, off, sysCopyToRead);
|
||||
return sysCopyToRead;
|
||||
} else {
|
||||
if (values[block] == null) {
|
||||
int toFill = Math.min(len, pending.length - element);
|
||||
for (int r = 0; r < toFill; r++, off++, element++) {
|
||||
arr[off] = minValues[block] + (long) (averages[block] * (long) element);
|
||||
}
|
||||
return toFill;
|
||||
} else {
|
||||
|
||||
/* packed block */
|
||||
int read = values[block].get(element, arr, off, len);
|
||||
for (int r = 0; r < read; r++, off++, element++) {
|
||||
arr[off] = minValues[block] + (long) (averages[block] * (long) element) + zigZagDecode(arr[off]);
|
||||
}
|
||||
return read;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -70,6 +112,7 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
|
|||
void grow(int newBlockCount) {
|
||||
super.grow(newBlockCount);
|
||||
this.averages = Arrays.copyOf(averages, newBlockCount);
|
||||
this.minValues = Arrays.copyOf(minValues, newBlockCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -91,58 +134,27 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
|
|||
}
|
||||
}
|
||||
if (maxDelta == 0) {
|
||||
deltas[valuesOff] = new PackedInts.NullReader(pendingOff);
|
||||
values[valuesOff] = new PackedInts.NullReader(pendingOff);
|
||||
} else {
|
||||
final int bitsRequired = maxDelta < 0 ? 64 : PackedInts.bitsRequired(maxDelta);
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, PackedInts.COMPACT);
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
|
||||
for (int i = 0; i < pendingOff; ) {
|
||||
i += mutable.set(i, pending, i, pendingOff - i);
|
||||
}
|
||||
deltas[valuesOff] = mutable;
|
||||
values[valuesOff] = mutable;
|
||||
}
|
||||
}
|
||||
|
||||
/** Return an iterator over the values of this buffer. */
|
||||
@Override
|
||||
public Iterator iterator() {
|
||||
return new Iterator();
|
||||
}
|
||||
|
||||
/** A long iterator. */
|
||||
public final class Iterator extends AbstractAppendingLongBuffer.Iterator {
|
||||
|
||||
Iterator() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
void fillValues() {
|
||||
if (vOff == valuesOff) {
|
||||
currentValues = pending;
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
currentCount = deltas[vOff].size();
|
||||
for (int k = 0; k < currentCount; ) {
|
||||
k += deltas[vOff].get(k, currentValues, k, currentCount - k);
|
||||
}
|
||||
for (int k = 0; k < currentCount; ++k) {
|
||||
currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k) + zigZagDecode(currentValues[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
long baseRamBytesUsed() {
|
||||
return super.baseRamBytesUsed()
|
||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF; // the additional array
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // 2 additional arrays
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return super.ramBytesUsed()
|
||||
+ RamUsageEstimator.sizeOf(averages);
|
||||
+ RamUsageEstimator.sizeOf(averages) + RamUsageEstimator.sizeOf(minValues);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -705,6 +705,9 @@ public class PackedInts {
|
|||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
Arrays.fill(arr, off, off + len, 0);
|
||||
return len;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.util.packed.PackedInts.Mutable;
|
|||
/**
|
||||
* A {@link PagedGrowableWriter}. This class slices data into fixed-size blocks
|
||||
* which have independent numbers of bits per value and grow on-demand.
|
||||
* <p>You should use this class instead of {@link AppendingLongBuffer} only when
|
||||
* <p>You should use this class instead of the {@link AbstractAppendingLongBuffer} related ones only when
|
||||
* you need random write-access. Otherwise this class will likely be slower and
|
||||
* less memory-efficient.
|
||||
* @lucene.internal
|
||||
|
|
|
@ -50,16 +50,19 @@
|
|||
<li><b>{@link org.apache.lucene.util.packed.PagedGrowableWriter}</b><ul>
|
||||
<li>Slices data into fixed-size blocks stored in GrowableWriters.</li>
|
||||
<li>Supports more than 2B values.</li>
|
||||
<li>You should use AppendingLongBuffer instead if you don't need random write access.</li>
|
||||
<li>You should use Appending(Delta)PackedLongBuffer instead if you don't need random write access.</li>
|
||||
</ul></li>
|
||||
<li><b>{@link org.apache.lucene.util.packed.AppendingLongBuffer}</b><ul>
|
||||
<li><b>{@link org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer}</b><ul>
|
||||
<li>Can store any sequence of longs.</li>
|
||||
<li>Compression is good when values are close to each other.</li>
|
||||
<li>Supports random reads, but only sequential writes.</li>
|
||||
<li>Can address up to 2^42 values.</li>
|
||||
</ul></li>
|
||||
<li><b>{@link org.apache.lucene.util.packed.AppendingPackedLongBuffer}</b><ul>
|
||||
<li>Same as AppendingDeltaPackedLongBuffer but assumes values are 0-based.</li>
|
||||
</ul></li>
|
||||
<li><b>{@link org.apache.lucene.util.packed.MonotonicAppendingLongBuffer}</b><ul>
|
||||
<li>Same as AppendingLongBuffer except that compression is good when the stream is a succession of affine functions.</li>
|
||||
<li>Same as AppendingDeltaPackedLongBuffer except that compression is good when the stream is a succession of affine functions.</li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
|
||||
|
|
|
@ -540,6 +540,27 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testPackedIntsNull() {
|
||||
int size = _TestUtil.nextInt(random(), 0, 256);
|
||||
Reader packedInts = new PackedInts.NullReader(size);
|
||||
assertEquals(0, packedInts.get(_TestUtil.nextInt(random(), 0, size - 1)));
|
||||
long[] arr = new long[size + 10];
|
||||
int r;
|
||||
Arrays.fill(arr, 1);
|
||||
r = packedInts.get(0, arr, 0, size - 1);
|
||||
assertEquals(size - 1, r);
|
||||
for (r--; r >= 0; r--) {
|
||||
assertEquals(0, arr[r]);
|
||||
}
|
||||
Arrays.fill(arr, 1);
|
||||
r = packedInts.get(10, arr, 0, size + 10);
|
||||
assertEquals(size - 10, r);
|
||||
for (int i = 0; i < size - 10; i++) {
|
||||
assertEquals(0, arr[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testBulkGet() {
|
||||
final int valueCount = 1111;
|
||||
final int index = random().nextInt(valueCount);
|
||||
|
@ -669,8 +690,8 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
PagedGrowableWriter writer = new PagedGrowableWriter(0, pageSize, _TestUtil.nextInt(random(), 1, 64), random().nextFloat());
|
||||
assertEquals(0, writer.size());
|
||||
|
||||
// compare against AppendingLongBuffer
|
||||
AppendingLongBuffer buf = new AppendingLongBuffer();
|
||||
// compare against AppendingDeltaPackedLongBuffer
|
||||
AppendingDeltaPackedLongBuffer buf = new AppendingDeltaPackedLongBuffer();
|
||||
int size = random().nextInt(1000000);
|
||||
long max = 5;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
|
@ -720,8 +741,8 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
PagedMutable writer = new PagedMutable(0, pageSize, bitsPerValue, random().nextFloat() / 2);
|
||||
assertEquals(0, writer.size());
|
||||
|
||||
// compare against AppendingLongBuffer
|
||||
AppendingLongBuffer buf = new AppendingLongBuffer();
|
||||
// compare against AppendingDeltaPackedLongBuffer
|
||||
AppendingDeltaPackedLongBuffer buf = new AppendingDeltaPackedLongBuffer();
|
||||
int size = random().nextInt(1000000);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
|
@ -924,25 +945,46 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
return true;
|
||||
}
|
||||
|
||||
enum DataType {
|
||||
PACKED,
|
||||
DELTA_PACKED,
|
||||
MONOTONIC
|
||||
}
|
||||
|
||||
|
||||
public void testAppendingLongBuffer() {
|
||||
|
||||
final long[] arr = new long[RandomInts.randomIntBetween(random(), 1, 1000000)];
|
||||
for (int bpv : new int[] {0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 62)}) {
|
||||
for (boolean monotonic : new boolean[] {true, false}) {
|
||||
float[] ratioOptions = new float[]{PackedInts.DEFAULT, PackedInts.COMPACT, PackedInts.FAST};
|
||||
for (int bpv : new int[]{0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 62)}) {
|
||||
for (DataType dataType : DataType.values()) {
|
||||
final int pageSize = 1 << _TestUtil.nextInt(random(), 6, 20);
|
||||
final int initialPageCount = _TestUtil.nextInt(random(), 0, 16);
|
||||
float acceptableOverheadRatio = ratioOptions[_TestUtil.nextInt(random(), 0, ratioOptions.length - 1)];
|
||||
AbstractAppendingLongBuffer buf;
|
||||
final int inc;
|
||||
if (monotonic) {
|
||||
buf = new MonotonicAppendingLongBuffer(initialPageCount, pageSize);
|
||||
inc = _TestUtil.nextInt(random(), -1000, 1000);
|
||||
} else {
|
||||
buf = new AppendingLongBuffer(initialPageCount, pageSize);
|
||||
inc = 0;
|
||||
switch (dataType) {
|
||||
case PACKED:
|
||||
buf = new AppendingPackedLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
inc = 0;
|
||||
break;
|
||||
case DELTA_PACKED:
|
||||
buf = new AppendingDeltaPackedLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
inc = 0;
|
||||
break;
|
||||
case MONOTONIC:
|
||||
buf = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
inc = _TestUtil.nextInt(random(), -1000, 1000);
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("added a type and forgot to add it here?");
|
||||
|
||||
}
|
||||
|
||||
if (bpv == 0) {
|
||||
arr[0] = random().nextLong();
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
arr[i] = arr[i-1] + inc;
|
||||
arr[i] = arr[i - 1] + inc;
|
||||
}
|
||||
} else if (bpv == 64) {
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
|
@ -954,6 +996,7 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
arr[i] = minValue + inc * i + random().nextLong() & PackedInts.maxValue(bpv); // _TestUtil.nextLong is too slow
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
buf.add(arr[i]);
|
||||
}
|
||||
|
@ -966,6 +1009,11 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
assertEquals(arr.length, buf.size());
|
||||
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
assertEquals(arr[i], buf.get(i));
|
||||
}
|
||||
|
||||
final AbstractAppendingLongBuffer.Iterator it = buf.iterator();
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -974,11 +1022,27 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
assertEquals(arr[i], it.next());
|
||||
}
|
||||
assertFalse(it.hasNext());
|
||||
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
assertEquals(arr[i], buf.get(i));
|
||||
|
||||
|
||||
long[] target = new long[arr.length + 1024]; // check the request for more is OK.
|
||||
for (int i = 0; i < arr.length; i += _TestUtil.nextInt(random(), 0, 10000)) {
|
||||
int lenToRead = random().nextInt(buf.pageSize() * 2) + 1;
|
||||
lenToRead = Math.min(lenToRead, target.length - i);
|
||||
int lenToCheck = Math.min(lenToRead, arr.length - i);
|
||||
int off = i;
|
||||
while (off < arr.length && lenToRead > 0) {
|
||||
int read = buf.get(off, target, off, lenToRead);
|
||||
assertTrue(read > 0);
|
||||
assertTrue(read <= lenToRead);
|
||||
lenToRead -= read;
|
||||
off += read;
|
||||
}
|
||||
|
||||
for (int j = 0; j < lenToCheck; j++) {
|
||||
assertEquals(arr[j + i], target[j + i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
|
||||
final long computedBytesUsed = buf.ramBytesUsed();
|
||||
assertEquals(expectedBytesUsed, computedBytesUsed);
|
||||
|
|
Loading…
Reference in New Issue