mirror of https://github.com/apache/lucene.git
LUCENE-4062: add new aligned packed bits impls for faster performance
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1342751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f4819005cf
commit
6a4a717220
|
@ -922,6 +922,11 @@ Optimizations
|
||||||
and few general improvements to DirectoryTaxonomyWriter.
|
and few general improvements to DirectoryTaxonomyWriter.
|
||||||
(Shai Erera, Gilad Barkai)
|
(Shai Erera, Gilad Barkai)
|
||||||
|
|
||||||
|
* LUCENE-4062: Add new aligned packed bits impls for faster lookup
|
||||||
|
performance; add float acceptableOverheadRatio to getWriter and
|
||||||
|
getMutable API to give packed ints freedom to pick faster
|
||||||
|
implementations (Adrien Grand via Mike McCandless)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-2803: The FieldCache can miss values if an entry for a reader
|
* LUCENE-2803: The FieldCache can miss values if an entry for a reader
|
||||||
|
|
|
@ -328,8 +328,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
// we'd have to try @ fewer bits and then grow
|
// we'd have to try @ fewer bits and then grow
|
||||||
// if we overflowed it.
|
// if we overflowed it.
|
||||||
|
|
||||||
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue());
|
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
|
||||||
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue());
|
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
|
||||||
|
|
||||||
termsDictOffsets = termsDictOffsetsM;
|
termsDictOffsets = termsDictOffsetsM;
|
||||||
termOffsets = termOffsetsM;
|
termOffsets = termOffsetsM;
|
||||||
|
|
|
@ -183,7 +183,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
// write primary terms dict offsets
|
// write primary terms dict offsets
|
||||||
packedIndexStart = out.getFilePointer();
|
packedIndexStart = out.getFilePointer();
|
||||||
|
|
||||||
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer));
|
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer), PackedInts.DEFAULT);
|
||||||
|
|
||||||
// relative to our indexStart
|
// relative to our indexStart
|
||||||
long upto = 0;
|
long upto = 0;
|
||||||
|
@ -196,7 +196,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
packedOffsetsStart = out.getFilePointer();
|
packedOffsetsStart = out.getFilePointer();
|
||||||
|
|
||||||
// write offsets into the byte[] terms
|
// write offsets into the byte[] terms
|
||||||
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength));
|
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength), PackedInts.DEFAULT);
|
||||||
upto = 0;
|
upto = 0;
|
||||||
for(int i=0;i<numIndexTerms;i++) {
|
for(int i=0;i<numIndexTerms;i++) {
|
||||||
w.add(upto);
|
w.add(upto);
|
||||||
|
|
|
@ -74,7 +74,7 @@ class TermInfosReaderIndex {
|
||||||
PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
|
PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
|
||||||
|
|
||||||
final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
|
final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
|
||||||
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, false);
|
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT);
|
||||||
|
|
||||||
String currentField = null;
|
String currentField = null;
|
||||||
List<String> fieldStrs = new ArrayList<String>();
|
List<String> fieldStrs = new ArrayList<String>();
|
||||||
|
|
|
@ -115,17 +115,19 @@ public final class Bytes {
|
||||||
* {@link Writer}. A call to {@link Writer#finish(int)} will release
|
* {@link Writer}. A call to {@link Writer#finish(int)} will release
|
||||||
* all internally used resources and frees the memory tracking
|
* all internally used resources and frees the memory tracking
|
||||||
* reference.
|
* reference.
|
||||||
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
|
* @param acceptableOverheadRatio
|
||||||
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
|
* how to trade space for speed. This option is only applicable for
|
||||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
|
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
|
||||||
|
* {@link Type#BYTES_VAR_SORTED}.
|
||||||
* @param context I/O Context
|
* @param context I/O Context
|
||||||
* @return a new {@link Writer} instance
|
* @return a new {@link Writer} instance
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* if the files for the writer can not be created.
|
* if the files for the writer can not be created.
|
||||||
|
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
|
||||||
*/
|
*/
|
||||||
public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
|
public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
|
||||||
boolean fixedSize, Comparator<BytesRef> sortComparator,
|
boolean fixedSize, Comparator<BytesRef> sortComparator,
|
||||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
|
Counter bytesUsed, IOContext context, float acceptableOverheadRatio)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// TODO -- i shouldn't have to specify fixed? can
|
// TODO -- i shouldn't have to specify fixed? can
|
||||||
// track itself & do the write thing at write time?
|
// track itself & do the write thing at write time?
|
||||||
|
@ -139,7 +141,7 @@ public final class Bytes {
|
||||||
} else if (mode == Mode.DEREF) {
|
} else if (mode == Mode.DEREF) {
|
||||||
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||||
} else if (mode == Mode.SORTED) {
|
} else if (mode == Mode.SORTED) {
|
||||||
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
|
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (mode == Mode.STRAIGHT) {
|
if (mode == Mode.STRAIGHT) {
|
||||||
|
@ -147,7 +149,7 @@ public final class Bytes {
|
||||||
} else if (mode == Mode.DEREF) {
|
} else if (mode == Mode.DEREF) {
|
||||||
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||||
} else if (mode == Mode.SORTED) {
|
} else if (mode == Mode.SORTED) {
|
||||||
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
|
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,32 +384,32 @@ public final class Bytes {
|
||||||
protected int lastDocId = -1;
|
protected int lastDocId = -1;
|
||||||
protected int[] docToEntry;
|
protected int[] docToEntry;
|
||||||
protected final BytesRefHash hash;
|
protected final BytesRefHash hash;
|
||||||
protected final boolean fasterButMoreRam;
|
protected final float acceptableOverheadRatio;
|
||||||
protected long maxBytes = 0;
|
protected long maxBytes = 0;
|
||||||
|
|
||||||
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||||
int codecVersion, Counter bytesUsed, IOContext context, Type type)
|
int codecVersion, Counter bytesUsed, IOContext context, Type type)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
||||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
|
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, PackedInts.DEFAULT, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||||
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
|
int codecVersion, Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
||||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
|
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, acceptableOverheadRatio, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
|
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
|
||||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
|
Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type) throws IOException {
|
||||||
super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
|
super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
|
||||||
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||||
docToEntry = new int[1];
|
docToEntry = new int[1];
|
||||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||||
this.fasterButMoreRam = fasterButMoreRam;
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
|
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
|
||||||
|
@ -506,7 +508,7 @@ public final class Bytes {
|
||||||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||||
long maxValue, int[] addresses, int[] toEntry) throws IOException {
|
long maxValue, int[] addresses, int[] toEntry) throws IOException {
|
||||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||||
bitsRequired(maxValue));
|
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
|
||||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||||
: docCount;
|
: docCount;
|
||||||
assert toEntry.length >= limit -1;
|
assert toEntry.length >= limit -1;
|
||||||
|
@ -530,7 +532,7 @@ public final class Bytes {
|
||||||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||||
long maxValue, long[] addresses, int[] toEntry) throws IOException {
|
long maxValue, long[] addresses, int[] toEntry) throws IOException {
|
||||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||||
bitsRequired(maxValue));
|
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
|
||||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||||
: docCount;
|
: docCount;
|
||||||
assert toEntry.length >= limit -1;
|
assert toEntry.length >= limit -1;
|
||||||
|
@ -551,11 +553,6 @@ public final class Bytes {
|
||||||
w.finish();
|
w.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int bitsRequired(long maxValue){
|
|
||||||
return fasterButMoreRam ?
|
|
||||||
PackedInts.getNextFixedSize(PackedInts.bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static abstract class BytesSortedSourceBase extends SortedSource {
|
static abstract class BytesSortedSourceBase extends SortedSource {
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for PerDocConsumer implementations
|
* Abstract base class for PerDocConsumer implementations
|
||||||
|
@ -41,7 +42,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
||||||
protected final String segmentName;
|
protected final String segmentName;
|
||||||
private final Counter bytesUsed;
|
private final Counter bytesUsed;
|
||||||
protected final IOContext context;
|
protected final IOContext context;
|
||||||
private final boolean fasterButMoreRam;
|
private final float acceptableOverheadRatio;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filename extension for index files
|
* Filename extension for index files
|
||||||
|
@ -57,20 +58,22 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
||||||
* @param state The state to initiate a {@link PerDocConsumer} instance
|
* @param state The state to initiate a {@link PerDocConsumer} instance
|
||||||
*/
|
*/
|
||||||
protected DocValuesWriterBase(PerDocWriteState state) {
|
protected DocValuesWriterBase(PerDocWriteState state) {
|
||||||
this(state, true);
|
this(state, PackedInts.FAST);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param state The state to initiate a {@link PerDocConsumer} instance
|
* @param state The state to initiate a {@link PerDocConsumer} instance
|
||||||
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
|
* @param acceptableOverheadRatio
|
||||||
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
|
* how to trade space for speed. This option is only applicable for
|
||||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
|
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
|
||||||
|
* {@link Type#BYTES_VAR_SORTED}.
|
||||||
|
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
|
||||||
*/
|
*/
|
||||||
protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) {
|
protected DocValuesWriterBase(PerDocWriteState state, float acceptableOverheadRatio) {
|
||||||
this.segmentName = state.segmentName;
|
this.segmentName = state.segmentName;
|
||||||
this.bytesUsed = state.bytesUsed;
|
this.bytesUsed = state.bytesUsed;
|
||||||
this.context = state.context;
|
this.context = state.context;
|
||||||
this.fasterButMoreRam = fasterButMoreRam;
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Directory getDirectory() throws IOException;
|
protected abstract Directory getDirectory() throws IOException;
|
||||||
|
@ -83,7 +86,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
||||||
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
|
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
|
||||||
return Writer.create(valueType,
|
return Writer.create(valueType,
|
||||||
PerDocProducerBase.docValuesId(segmentName, field.number),
|
PerDocProducerBase.docValuesId(segmentName, field.number),
|
||||||
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
|
getDirectory(), getComparator(), bytesUsed, context, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -58,8 +58,8 @@ class FixedSortedBytesImpl {
|
||||||
private final Comparator<BytesRef> comp;
|
private final Comparator<BytesRef> comp;
|
||||||
|
|
||||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
|
||||||
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_FIXED_SORTED);
|
||||||
this.comp = comp;
|
this.comp = comp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ class FixedSortedBytesImpl {
|
||||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||||
idxOut.writeInt(maxOrd);
|
idxOut.writeInt(maxOrd);
|
||||||
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
|
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
|
||||||
PackedInts.bitsRequired(maxOrd));
|
PackedInts.bitsRequired(maxOrd), PackedInts.DEFAULT);
|
||||||
for (SortedSourceSlice slice : slices) {
|
for (SortedSourceSlice slice : slices) {
|
||||||
slice.writeOrds(ordsWriter);
|
slice.writeOrds(ordsWriter);
|
||||||
}
|
}
|
||||||
|
|
|
@ -103,7 +103,7 @@ class PackedIntValues {
|
||||||
: ++maxValue - minValue;
|
: ++maxValue - minValue;
|
||||||
datOut.writeLong(defaultValue);
|
datOut.writeLong(defaultValue);
|
||||||
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
|
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
|
||||||
PackedInts.bitsRequired(maxValue - minValue));
|
PackedInts.bitsRequired(maxValue - minValue), PackedInts.DEFAULT);
|
||||||
for (int i = 0; i < lastDocID + 1; i++) {
|
for (int i = 0; i < lastDocID + 1; i++) {
|
||||||
set(bytesRef, i);
|
set(bytesRef, i);
|
||||||
byte[] bytes = bytesRef.bytes;
|
byte[] bytes = bytesRef.bytes;
|
||||||
|
|
|
@ -60,8 +60,8 @@ final class VarSortedBytesImpl {
|
||||||
private final Comparator<BytesRef> comp;
|
private final Comparator<BytesRef> comp;
|
||||||
|
|
||||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
|
||||||
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_VAR_SORTED);
|
||||||
this.comp = comp;
|
this.comp = comp;
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
|
@ -83,7 +83,7 @@ final class VarSortedBytesImpl {
|
||||||
|
|
||||||
idxOut.writeLong(maxBytes);
|
idxOut.writeLong(maxBytes);
|
||||||
final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1,
|
final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1,
|
||||||
PackedInts.bitsRequired(maxBytes));
|
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
|
||||||
offsetWriter.add(0);
|
offsetWriter.add(0);
|
||||||
for (int i = 0; i < maxOrd; i++) {
|
for (int i = 0; i < maxOrd; i++) {
|
||||||
offsetWriter.add(offsets[i]);
|
offsetWriter.add(offsets[i]);
|
||||||
|
@ -91,7 +91,7 @@ final class VarSortedBytesImpl {
|
||||||
offsetWriter.finish();
|
offsetWriter.finish();
|
||||||
|
|
||||||
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
|
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
|
||||||
PackedInts.bitsRequired(maxOrd-1));
|
PackedInts.bitsRequired(maxOrd-1), PackedInts.DEFAULT);
|
||||||
for (SortedSourceSlice slice : slices) {
|
for (SortedSourceSlice slice : slices) {
|
||||||
slice.writeOrds(ordsWriter);
|
slice.writeOrds(ordsWriter);
|
||||||
}
|
}
|
||||||
|
@ -127,7 +127,7 @@ final class VarSortedBytesImpl {
|
||||||
// total bytes of data
|
// total bytes of data
|
||||||
idxOut.writeLong(maxBytes);
|
idxOut.writeLong(maxBytes);
|
||||||
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
|
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
|
||||||
bitsRequired(maxBytes));
|
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
|
||||||
// first dump bytes data, recording index & write offset as
|
// first dump bytes data, recording index & write offset as
|
||||||
// we go
|
// we go
|
||||||
final BytesRef spare = new BytesRef();
|
final BytesRef spare = new BytesRef();
|
||||||
|
|
|
@ -198,7 +198,7 @@ class VarStraightBytesImpl {
|
||||||
if (lastDocID == -1) {
|
if (lastDocID == -1) {
|
||||||
idxOut.writeVLong(0);
|
idxOut.writeVLong(0);
|
||||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
||||||
PackedInts.bitsRequired(0));
|
PackedInts.bitsRequired(0), PackedInts.DEFAULT);
|
||||||
// docCount+1 so we write sentinel
|
// docCount+1 so we write sentinel
|
||||||
for (int i = 0; i < docCount+1; i++) {
|
for (int i = 0; i < docCount+1; i++) {
|
||||||
w.add(0);
|
w.add(0);
|
||||||
|
@ -208,7 +208,7 @@ class VarStraightBytesImpl {
|
||||||
fill(docCount, address);
|
fill(docCount, address);
|
||||||
idxOut.writeVLong(address);
|
idxOut.writeVLong(address);
|
||||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
||||||
PackedInts.bitsRequired(address));
|
PackedInts.bitsRequired(address), PackedInts.DEFAULT);
|
||||||
for (int i = 0; i < docCount; i++) {
|
for (int i = 0; i < docCount; i++) {
|
||||||
w.add(docToAddress[i]);
|
w.add(docToAddress[i]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract API for per-document stored primitive values of type <tt>byte[]</tt>
|
* Abstract API for per-document stored primitive values of type <tt>byte[]</tt>
|
||||||
|
@ -77,14 +78,16 @@ abstract class Writer extends DocValuesConsumer {
|
||||||
* the {@link Directory} to create the files from.
|
* the {@link Directory} to create the files from.
|
||||||
* @param bytesUsed
|
* @param bytesUsed
|
||||||
* a byte-usage tracking reference
|
* a byte-usage tracking reference
|
||||||
* @param fasterButMoreRam Whether the space used for packed ints should be rounded up for higher lookup performance.
|
* @param acceptableOverheadRatio
|
||||||
* Currently this parameter only applies for types {@link Type#BYTES_VAR_SORTED}
|
* how to trade space for speed. This option is only applicable for
|
||||||
* and {@link Type#BYTES_FIXED_SORTED}.
|
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
|
||||||
|
* {@link Type#BYTES_VAR_SORTED}.
|
||||||
* @return a new {@link Writer} instance for the given {@link Type}
|
* @return a new {@link Writer} instance for the given {@link Type}
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
|
* @see PackedInts#getReader(org.apache.lucene.store.DataInput, float)
|
||||||
*/
|
*/
|
||||||
public static DocValuesConsumer create(Type type, String id, Directory directory,
|
public static DocValuesConsumer create(Type type, String id, Directory directory,
|
||||||
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
|
||||||
if (comp == null) {
|
if (comp == null) {
|
||||||
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
@ -101,22 +104,22 @@ abstract class Writer extends DocValuesConsumer {
|
||||||
return Floats.getWriter(directory, id, bytesUsed, context, type);
|
return Floats.getWriter(directory, id, bytesUsed, context, type);
|
||||||
case BYTES_FIXED_STRAIGHT:
|
case BYTES_FIXED_STRAIGHT:
|
||||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
|
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
|
||||||
bytesUsed, context, fasterButMoreRam);
|
bytesUsed, context, acceptableOverheadRatio);
|
||||||
case BYTES_FIXED_DEREF:
|
case BYTES_FIXED_DEREF:
|
||||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
|
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
|
||||||
bytesUsed, context, fasterButMoreRam);
|
bytesUsed, context, acceptableOverheadRatio);
|
||||||
case BYTES_FIXED_SORTED:
|
case BYTES_FIXED_SORTED:
|
||||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
|
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
|
||||||
bytesUsed, context, fasterButMoreRam);
|
bytesUsed, context, acceptableOverheadRatio);
|
||||||
case BYTES_VAR_STRAIGHT:
|
case BYTES_VAR_STRAIGHT:
|
||||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
|
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
|
||||||
bytesUsed, context, fasterButMoreRam);
|
bytesUsed, context, acceptableOverheadRatio);
|
||||||
case BYTES_VAR_DEREF:
|
case BYTES_VAR_DEREF:
|
||||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
|
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
|
||||||
bytesUsed, context, fasterButMoreRam);
|
bytesUsed, context, acceptableOverheadRatio);
|
||||||
case BYTES_VAR_SORTED:
|
case BYTES_VAR_SORTED:
|
||||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
|
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
|
||||||
bytesUsed, context, fasterButMoreRam);
|
bytesUsed, context, acceptableOverheadRatio);
|
||||||
default:
|
default:
|
||||||
throw new IllegalArgumentException("Unknown Values: " + type);
|
throw new IllegalArgumentException("Unknown Values: " + type);
|
||||||
}
|
}
|
||||||
|
|
|
@ -494,7 +494,7 @@ public interface FieldCache {
|
||||||
* faster lookups (default is "true"). Note that the
|
* faster lookups (default is "true"). Note that the
|
||||||
* first call for a given reader and field "wins",
|
* first call for a given reader and field "wins",
|
||||||
* subsequent calls will share the same cache entry. */
|
* subsequent calls will share the same cache entry. */
|
||||||
public DocTerms getTerms (AtomicReader reader, String field, boolean fasterButMoreRAM)
|
public DocTerms getTerms (AtomicReader reader, String field, float acceptableOverheadRatio)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
/** Returned by {@link #getTermsIndex} */
|
/** Returned by {@link #getTermsIndex} */
|
||||||
|
@ -571,7 +571,7 @@ public interface FieldCache {
|
||||||
* faster lookups (default is "true"). Note that the
|
* faster lookups (default is "true"). Note that the
|
||||||
* first call for a given reader and field "wins",
|
* first call for a given reader and field "wins",
|
||||||
* subsequent calls will share the same cache entry. */
|
* subsequent calls will share the same cache entry. */
|
||||||
public DocTermsIndex getTermsIndex (AtomicReader reader, String field, boolean fasterButMoreRAM)
|
public DocTermsIndex getTermsIndex (AtomicReader reader, String field, float acceptableOverheadRatio)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1071,14 +1071,12 @@ class FieldCacheImpl implements FieldCache {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean DEFAULT_FASTER_BUT_MORE_RAM = true;
|
|
||||||
|
|
||||||
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
|
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
|
||||||
return getTermsIndex(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
|
return getTermsIndex(reader, field, PackedInts.FAST);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException {
|
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
|
||||||
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false);
|
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DocTermsIndexCache extends Cache {
|
static class DocTermsIndexCache extends Cache {
|
||||||
|
@ -1092,7 +1090,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
|
|
||||||
Terms terms = reader.terms(entryKey.field);
|
Terms terms = reader.terms(entryKey.field);
|
||||||
|
|
||||||
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
|
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
|
||||||
|
|
||||||
final PagedBytes bytes = new PagedBytes(15);
|
final PagedBytes bytes = new PagedBytes(15);
|
||||||
|
|
||||||
|
@ -1142,8 +1140,8 @@ class FieldCacheImpl implements FieldCache {
|
||||||
startNumUniqueTerms = 1;
|
startNumUniqueTerms = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM);
|
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
|
||||||
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, fasterButMoreRAM);
|
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
|
||||||
|
|
||||||
// 0 is reserved for "unset"
|
// 0 is reserved for "unset"
|
||||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||||
|
@ -1219,11 +1217,11 @@ class FieldCacheImpl implements FieldCache {
|
||||||
// TODO: this if DocTermsIndex was already created, we
|
// TODO: this if DocTermsIndex was already created, we
|
||||||
// should share it...
|
// should share it...
|
||||||
public DocTerms getTerms(AtomicReader reader, String field) throws IOException {
|
public DocTerms getTerms(AtomicReader reader, String field) throws IOException {
|
||||||
return getTerms(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
|
return getTerms(reader, field, PackedInts.FAST);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DocTerms getTerms(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException {
|
public DocTerms getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
|
||||||
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false);
|
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class DocTermsCache extends Cache {
|
static final class DocTermsCache extends Cache {
|
||||||
|
@ -1237,7 +1235,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
|
|
||||||
Terms terms = reader.terms(entryKey.field);
|
Terms terms = reader.terms(entryKey.field);
|
||||||
|
|
||||||
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
|
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
|
||||||
|
|
||||||
final int termCountHardLimit = reader.maxDoc();
|
final int termCountHardLimit = reader.maxDoc();
|
||||||
|
|
||||||
|
@ -1268,7 +1266,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
startBPV = 1;
|
startBPV = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM);
|
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), acceptableOverheadRatio);
|
||||||
|
|
||||||
// pointer==0 means not set
|
// pointer==0 means not set
|
||||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
|
final class DirectPacked64SingleBlockReader extends PackedInts.ReaderImpl {
|
||||||
|
|
||||||
|
private final IndexInput in;
|
||||||
|
private final long startPointer;
|
||||||
|
private final int valuesPerBlock;
|
||||||
|
private final long mask;
|
||||||
|
|
||||||
|
DirectPacked64SingleBlockReader(int bitsPerValue, int valueCount,
|
||||||
|
IndexInput in) {
|
||||||
|
super(valueCount, bitsPerValue);
|
||||||
|
this.in = in;
|
||||||
|
startPointer = in.getFilePointer();
|
||||||
|
valuesPerBlock = 64 / bitsPerValue;
|
||||||
|
mask = ~(~0L << bitsPerValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long get(int index) {
|
||||||
|
final int blockOffset = index / valuesPerBlock;
|
||||||
|
final long skip = ((long) blockOffset) << 3;
|
||||||
|
try {
|
||||||
|
in.seek(startPointer + skip);
|
||||||
|
|
||||||
|
long block = in.readLong();
|
||||||
|
final int offsetInBlock = index % valuesPerBlock;
|
||||||
|
return (block >>> (offsetInBlock * bitsPerValue)) & mask;
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new IllegalStateException("failed", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,11 +22,9 @@ import org.apache.lucene.store.IndexInput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/* Reads directly from disk on each get */
|
/* Reads directly from disk on each get */
|
||||||
final class DirectReader implements PackedInts.Reader {
|
final class DirectPackedReader extends PackedInts.ReaderImpl {
|
||||||
private final IndexInput in;
|
private final IndexInput in;
|
||||||
private final long startPointer;
|
private final long startPointer;
|
||||||
private final int bitsPerValue;
|
|
||||||
private final int valueCount;
|
|
||||||
|
|
||||||
private static final int BLOCK_BITS = Packed64.BLOCK_BITS;
|
private static final int BLOCK_BITS = Packed64.BLOCK_BITS;
|
||||||
private static final int MOD_MASK = Packed64.MOD_MASK;
|
private static final int MOD_MASK = Packed64.MOD_MASK;
|
||||||
|
@ -34,10 +32,9 @@ final class DirectReader implements PackedInts.Reader {
|
||||||
// masks[n-1] masks for bottom n bits
|
// masks[n-1] masks for bottom n bits
|
||||||
private final long[] masks;
|
private final long[] masks;
|
||||||
|
|
||||||
public DirectReader(int bitsPerValue, int valueCount, IndexInput in)
|
public DirectPackedReader(int bitsPerValue, int valueCount, IndexInput in)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this.valueCount = valueCount;
|
super(valueCount, bitsPerValue);
|
||||||
this.bitsPerValue = bitsPerValue;
|
|
||||||
this.in = in;
|
this.in = in;
|
||||||
|
|
||||||
long v = 1;
|
long v = 1;
|
||||||
|
@ -50,26 +47,6 @@ final class DirectReader implements PackedInts.Reader {
|
||||||
startPointer = in.getFilePointer();
|
startPointer = in.getFilePointer();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getBitsPerValue() {
|
|
||||||
return bitsPerValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return valueCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasArray() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getArray() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long get(int index) {
|
public long get(int index) {
|
||||||
final long majorBitPos = (long)index * bitsPerValue;
|
final long majorBitPos = (long)index * bitsPerValue;
|
|
@ -28,22 +28,14 @@ public class GrowableWriter implements PackedInts.Mutable {
|
||||||
|
|
||||||
private long currentMaxValue;
|
private long currentMaxValue;
|
||||||
private PackedInts.Mutable current;
|
private PackedInts.Mutable current;
|
||||||
private final boolean roundFixedSize;
|
private final float acceptableOverheadRatio;
|
||||||
|
|
||||||
public GrowableWriter(int startBitsPerValue, int valueCount, boolean roundFixedSize) {
|
public GrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
|
||||||
this.roundFixedSize = roundFixedSize;
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
current = PackedInts.getMutable(valueCount, getSize(startBitsPerValue));
|
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
|
||||||
currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
|
currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
private final int getSize(int bpv) {
|
|
||||||
if (roundFixedSize) {
|
|
||||||
return PackedInts.getNextFixedSize(bpv);
|
|
||||||
} else {
|
|
||||||
return bpv;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public long get(int index) {
|
public long get(int index) {
|
||||||
return current.get(index);
|
return current.get(index);
|
||||||
}
|
}
|
||||||
|
@ -78,7 +70,7 @@ public class GrowableWriter implements PackedInts.Mutable {
|
||||||
currentMaxValue *= 2;
|
currentMaxValue *= 2;
|
||||||
}
|
}
|
||||||
final int valueCount = size();
|
final int valueCount = size();
|
||||||
PackedInts.Mutable next = PackedInts.getMutable(valueCount, getSize(bpv));
|
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bpv, acceptableOverheadRatio);
|
||||||
for(int i=0;i<valueCount;i++) {
|
for(int i=0;i<valueCount;i++) {
|
||||||
next.set(i, current.get(i));
|
next.set(i, current.get(i));
|
||||||
}
|
}
|
||||||
|
@ -93,11 +85,12 @@ public class GrowableWriter implements PackedInts.Mutable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public GrowableWriter resize(int newSize) {
|
public GrowableWriter resize(int newSize) {
|
||||||
GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, roundFixedSize);
|
GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
|
||||||
final int limit = Math.min(size(), newSize);
|
final int limit = Math.min(size(), newSize);
|
||||||
for(int i=0;i<limit;i++) {
|
for(int i=0;i<limit;i++) {
|
||||||
next.set(i, get(i));
|
next.set(i, get(i));
|
||||||
}
|
}
|
||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** 48 bitsPerValue backed by short[] */
|
||||||
|
final class Packed16ThreeBlocks extends PackedInts.ReaderImpl
|
||||||
|
implements PackedInts.Mutable {
|
||||||
|
|
||||||
|
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||||
|
|
||||||
|
private final short[] blocks;
|
||||||
|
|
||||||
|
Packed16ThreeBlocks(int valueCount) {
|
||||||
|
super(valueCount, 48);
|
||||||
|
if (valueCount > MAX_SIZE) {
|
||||||
|
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||||
|
}
|
||||||
|
this.blocks = new short[3 * valueCount];
|
||||||
|
}
|
||||||
|
|
||||||
|
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||||
|
this(valueCount);
|
||||||
|
for (int i = 0; i < blocks.length; i++) {
|
||||||
|
blocks[i] = in.readShort();
|
||||||
|
}
|
||||||
|
final int mod = blocks.length % 4;
|
||||||
|
if (mod != 0) {
|
||||||
|
final int pad = 4 - mod;
|
||||||
|
// round out long
|
||||||
|
for (int i = 0; i < pad; i++) {
|
||||||
|
in.readShort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long get(int index) {
|
||||||
|
final int o = index * 3;
|
||||||
|
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void set(int index, long value) {
|
||||||
|
final int o = index * 3;
|
||||||
|
blocks[o] = (short) (value >> 32);
|
||||||
|
blocks[o+1] = (short) (value >> 16);
|
||||||
|
blocks[o+2] = (short) value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
Arrays.fill(blocks, (short) 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.sizeOf(blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||||
|
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,227 +0,0 @@
|
||||||
package org.apache.lucene.util.packed;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataInput;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Space optimized random access capable array of values with a fixed number of
|
|
||||||
* bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher
|
|
||||||
* numbers.
|
|
||||||
* </p><p>
|
|
||||||
* The implementation strives to avoid conditionals and expensive operations,
|
|
||||||
* sacrificing code clarity to achieve better performance.
|
|
||||||
*/
|
|
||||||
|
|
||||||
class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
|
|
||||||
static final int BLOCK_SIZE = 32; // 32 = int, 64 = long
|
|
||||||
static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE
|
|
||||||
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
|
|
||||||
|
|
||||||
private static final int ENTRY_SIZE = BLOCK_SIZE + 1;
|
|
||||||
private static final int FAC_BITPOS = 3;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In order to make an efficient value-getter, conditionals should be
|
|
||||||
* avoided. A value can be positioned inside of a block, requiring shifting
|
|
||||||
* left or right or it can span two blocks, requiring a left-shift on the
|
|
||||||
* first block and a right-shift on the right block.
|
|
||||||
* </p><p>
|
|
||||||
* By always shifting the first block both left and right, we get exactly
|
|
||||||
* the right bits. By always shifting the second block right and applying
|
|
||||||
* a mask, we get the right bits there. After that, we | the two bitsets.
|
|
||||||
*/
|
|
||||||
private static final int[][] SHIFTS =
|
|
||||||
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
|
|
||||||
private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE];
|
|
||||||
|
|
||||||
static { // Generate shifts
|
|
||||||
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
|
|
||||||
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
|
|
||||||
int[] currentShifts = SHIFTS[elementBits];
|
|
||||||
int base = bitPos * FAC_BITPOS;
|
|
||||||
currentShifts[base ] = bitPos;
|
|
||||||
currentShifts[base + 1] = BLOCK_SIZE - elementBits;
|
|
||||||
if (bitPos <= BLOCK_SIZE - elementBits) { // Single block
|
|
||||||
currentShifts[base + 2] = 0;
|
|
||||||
MASKS[elementBits][bitPos] = 0;
|
|
||||||
} else { // Two blocks
|
|
||||||
int rBits = elementBits - (BLOCK_SIZE - bitPos);
|
|
||||||
currentShifts[base + 2] = BLOCK_SIZE - rBits;
|
|
||||||
MASKS[elementBits][bitPos] = ~(~0 << rBits);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The setter requires more masking than the getter.
|
|
||||||
*/
|
|
||||||
private static final int[][] WRITE_MASKS =
|
|
||||||
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
|
|
||||||
static {
|
|
||||||
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
|
|
||||||
int elementPosMask = ~(~0 << elementBits);
|
|
||||||
int[] currentShifts = SHIFTS[elementBits];
|
|
||||||
int[] currentMasks = WRITE_MASKS[elementBits];
|
|
||||||
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
|
|
||||||
int base = bitPos * FAC_BITPOS;
|
|
||||||
currentMasks[base ] =~((elementPosMask
|
|
||||||
<< currentShifts[base + 1])
|
|
||||||
>>> currentShifts[base]);
|
|
||||||
if (bitPos <= BLOCK_SIZE - elementBits) { // Second block not used
|
|
||||||
currentMasks[base+1] = ~0; // Keep all bits
|
|
||||||
currentMasks[base+2] = 0; // Or with 0
|
|
||||||
} else {
|
|
||||||
currentMasks[base+1] = ~(elementPosMask
|
|
||||||
<< currentShifts[base + 2]);
|
|
||||||
currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The bits */
|
|
||||||
private int[] blocks;
|
|
||||||
|
|
||||||
// Cached calculations
|
|
||||||
private int maxPos; // blocks.length * BLOCK_SIZE / bitsPerValue - 1
|
|
||||||
private int[] shifts; // The shifts for the current bitsPerValue
|
|
||||||
private int[] readMasks;
|
|
||||||
private int[] writeMasks;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an array with the internal structures adjusted for the given
|
|
||||||
* limits and initialized to 0.
|
|
||||||
* @param valueCount the number of elements.
|
|
||||||
* @param bitsPerValue the number of bits available for any given value.
|
|
||||||
* Note: bitsPerValue >32 is not supported by this implementation.
|
|
||||||
*/
|
|
||||||
public Packed32(int valueCount, int bitsPerValue) {
|
|
||||||
this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)],
|
|
||||||
valueCount, bitsPerValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an array with content retrieved from the given DataInput.
|
|
||||||
* @param in a DataInput, positioned at the start of Packed64-content.
|
|
||||||
* @param valueCount the number of elements.
|
|
||||||
* @param bitsPerValue the number of bits available for any given value.
|
|
||||||
* @throws java.io.IOException if the values for the backing array could not
|
|
||||||
* be retrieved.
|
|
||||||
*/
|
|
||||||
public Packed32(DataInput in, int valueCount, int bitsPerValue)
|
|
||||||
throws IOException {
|
|
||||||
super(valueCount, bitsPerValue);
|
|
||||||
int size = size(bitsPerValue, valueCount);
|
|
||||||
blocks = new int[size + 1]; // +1 due to non-conditional tricks
|
|
||||||
// TODO: find a faster way to bulk-read ints...
|
|
||||||
for(int i = 0 ; i < size ; i++) {
|
|
||||||
blocks[i] = in.readInt();
|
|
||||||
}
|
|
||||||
if (size % 2 == 1) {
|
|
||||||
in.readInt(); // Align to long
|
|
||||||
}
|
|
||||||
updateCached();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int size(int bitsPerValue, int valueCount) {
|
|
||||||
final long totBitCount = (long) valueCount * bitsPerValue;
|
|
||||||
return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an array backed by the given blocks.
|
|
||||||
* </p><p>
|
|
||||||
* Note: The blocks are used directly, so changes to the given block will
|
|
||||||
* affect the Packed32-structure.
|
|
||||||
* @param blocks used as the internal backing array.
|
|
||||||
* @param valueCount the number of values.
|
|
||||||
* @param bitsPerValue the number of bits available for any given value.
|
|
||||||
* Note: bitsPerValue >32 is not supported by this implementation.
|
|
||||||
*/
|
|
||||||
public Packed32(int[] blocks, int valueCount, int bitsPerValue) {
|
|
||||||
// TODO: Check that blocks.length is sufficient for holding length values
|
|
||||||
super(valueCount, bitsPerValue);
|
|
||||||
if (bitsPerValue > 31) {
|
|
||||||
throw new IllegalArgumentException(String.format(
|
|
||||||
"This array only supports values of 31 bits or less. The "
|
|
||||||
+ "required number of bits was %d. The Packed64 "
|
|
||||||
+ "implementation allows values with more than 31 bits",
|
|
||||||
bitsPerValue));
|
|
||||||
}
|
|
||||||
this.blocks = blocks;
|
|
||||||
updateCached();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateCached() {
|
|
||||||
readMasks = MASKS[bitsPerValue];
|
|
||||||
maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2);
|
|
||||||
shifts = SHIFTS[bitsPerValue];
|
|
||||||
writeMasks = WRITE_MASKS[bitsPerValue];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param index the position of the value.
|
|
||||||
* @return the value at the given index.
|
|
||||||
*/
|
|
||||||
public long get(final int index) {
|
|
||||||
assert index >= 0 && index < size();
|
|
||||||
final long majorBitPos = (long)index * bitsPerValue;
|
|
||||||
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
|
|
||||||
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
|
|
||||||
|
|
||||||
final int base = bitPos * FAC_BITPOS;
|
|
||||||
|
|
||||||
return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
|
|
||||||
((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void set(final int index, final long value) {
|
|
||||||
final int intValue = (int)value;
|
|
||||||
final long majorBitPos = (long)index * bitsPerValue;
|
|
||||||
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
|
|
||||||
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
|
|
||||||
final int base = bitPos * FAC_BITPOS;
|
|
||||||
|
|
||||||
blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base])
|
|
||||||
| (intValue << shifts[base + 1] >>> shifts[base]);
|
|
||||||
blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1])
|
|
||||||
| ((intValue << shifts[base + 2])
|
|
||||||
& writeMasks[base+2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clear() {
|
|
||||||
Arrays.fill(blocks, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos
|
|
||||||
+ ", elements.length=" + blocks.length + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
public long ramBytesUsed() {
|
|
||||||
return RamUsageEstimator.sizeOf(blocks);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,365 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is similar to {@link Packed64} except that it trades space for
|
||||||
|
* speed by ensuring that a single block needs to be read/written in order to
|
||||||
|
* read/write a value.
|
||||||
|
*/
|
||||||
|
abstract class Packed64SingleBlock extends PackedInts.ReaderImpl
|
||||||
|
implements PackedInts.Mutable {
|
||||||
|
|
||||||
|
private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] {1, 2, 3, 4,
|
||||||
|
5, 6, 7, 9, 10, 12, 21};
|
||||||
|
private static final long[][] WRITE_MASKS = new long[22][];
|
||||||
|
private static final int[][] SHIFTS = new int[22][];
|
||||||
|
static {
|
||||||
|
for (int bpv : SUPPORTED_BITS_PER_VALUE) {
|
||||||
|
initMasks(bpv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void initMasks(int bpv) {
|
||||||
|
int valuesPerBlock = Long.SIZE / bpv;
|
||||||
|
long[] writeMasks = new long[valuesPerBlock];
|
||||||
|
int[] shifts = new int[valuesPerBlock];
|
||||||
|
long bits = (1L << bpv) - 1;
|
||||||
|
for (int i = 0; i < valuesPerBlock; ++i) {
|
||||||
|
shifts[i] = bpv * i;
|
||||||
|
writeMasks[i] = ~(bits << shifts[i]);
|
||||||
|
}
|
||||||
|
WRITE_MASKS[bpv] = writeMasks;
|
||||||
|
SHIFTS[bpv] = shifts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {
|
||||||
|
switch (bitsPerValue) {
|
||||||
|
case 1:
|
||||||
|
return new Packed64SingleBlock1(valueCount);
|
||||||
|
case 2:
|
||||||
|
return new Packed64SingleBlock2(valueCount);
|
||||||
|
case 3:
|
||||||
|
return new Packed64SingleBlock3(valueCount);
|
||||||
|
case 4:
|
||||||
|
return new Packed64SingleBlock4(valueCount);
|
||||||
|
case 5:
|
||||||
|
return new Packed64SingleBlock5(valueCount);
|
||||||
|
case 6:
|
||||||
|
return new Packed64SingleBlock6(valueCount);
|
||||||
|
case 7:
|
||||||
|
return new Packed64SingleBlock7(valueCount);
|
||||||
|
case 9:
|
||||||
|
return new Packed64SingleBlock9(valueCount);
|
||||||
|
case 10:
|
||||||
|
return new Packed64SingleBlock10(valueCount);
|
||||||
|
case 12:
|
||||||
|
return new Packed64SingleBlock12(valueCount);
|
||||||
|
case 21:
|
||||||
|
return new Packed64SingleBlock21(valueCount);
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Unsupported bitsPerValue: "
|
||||||
|
+ bitsPerValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Packed64SingleBlock create(DataInput in,
|
||||||
|
int valueCount, int bitsPerValue) throws IOException {
|
||||||
|
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
|
||||||
|
for (int i = 0; i < reader.blocks.length; ++i) {
|
||||||
|
reader.blocks[i] = in.readLong();
|
||||||
|
}
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isSupported(int bitsPerValue) {
|
||||||
|
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static float overheadPerValue(int bitsPerValue) {
|
||||||
|
int valuesPerBlock = 64 / bitsPerValue;
|
||||||
|
int overhead = 64 % bitsPerValue;
|
||||||
|
return (float) overhead / valuesPerBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final long[] blocks;
|
||||||
|
protected final int valuesPerBlock;
|
||||||
|
protected final int[] shifts;
|
||||||
|
protected final long[] writeMasks;
|
||||||
|
protected final long readMask;
|
||||||
|
|
||||||
|
Packed64SingleBlock(int valueCount, int bitsPerValue) {
|
||||||
|
super(valueCount, bitsPerValue);
|
||||||
|
valuesPerBlock = Long.SIZE / bitsPerValue;
|
||||||
|
blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
|
||||||
|
shifts = SHIFTS[bitsPerValue];
|
||||||
|
writeMasks = WRITE_MASKS[bitsPerValue];
|
||||||
|
readMask = ~writeMasks[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
||||||
|
return valueCount / valuesPerBlock
|
||||||
|
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / valuesPerBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % valuesPerBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long get(int index) {
|
||||||
|
final int o = blockOffset(index);
|
||||||
|
final int b = offsetInBlock(index);
|
||||||
|
|
||||||
|
return (blocks[o] >> shifts[b]) & readMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void set(int index, long value) {
|
||||||
|
final int o = blockOffset(index);
|
||||||
|
final int b = offsetInBlock(index);
|
||||||
|
|
||||||
|
blocks[o] = (blocks[o] & writeMasks[b]) | (value << shifts[b]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
Arrays.fill(blocks, 0L);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.sizeOf(blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||||
|
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Specialisations that allow the JVM to optimize computation of the block
|
||||||
|
// offset as well as the offset in block
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock21 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock21(int valueCount) {
|
||||||
|
super(valueCount, 21);
|
||||||
|
assert valuesPerBlock == 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock12 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock12(int valueCount) {
|
||||||
|
super(valueCount, 12);
|
||||||
|
assert valuesPerBlock == 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock10 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock10(int valueCount) {
|
||||||
|
super(valueCount, 10);
|
||||||
|
assert valuesPerBlock == 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 6;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock9 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock9(int valueCount) {
|
||||||
|
super(valueCount, 9);
|
||||||
|
assert valuesPerBlock == 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 7;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock7 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock7(int valueCount) {
|
||||||
|
super(valueCount, 7);
|
||||||
|
assert valuesPerBlock == 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 9;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock6 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock6(int valueCount) {
|
||||||
|
super(valueCount, 6);
|
||||||
|
assert valuesPerBlock == 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock5 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock5(int valueCount) {
|
||||||
|
super(valueCount, 5);
|
||||||
|
assert valuesPerBlock == 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 12;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock4 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock4(int valueCount) {
|
||||||
|
super(valueCount, 4);
|
||||||
|
assert valuesPerBlock == 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset >> 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset & 15;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock3 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock3(int valueCount) {
|
||||||
|
super(valueCount, 3);
|
||||||
|
assert valuesPerBlock == 21;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset / 21;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset % 21;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock2 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock2(int valueCount) {
|
||||||
|
super(valueCount, 2);
|
||||||
|
assert valuesPerBlock == 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset >> 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset & 31;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Packed64SingleBlock1 extends Packed64SingleBlock {
|
||||||
|
|
||||||
|
Packed64SingleBlock1(int valueCount) {
|
||||||
|
super(valueCount, 1);
|
||||||
|
assert valuesPerBlock == 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int blockOffset(int offset) {
|
||||||
|
return offset >> 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int offsetInBlock(int offset) {
|
||||||
|
return offset & 63;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
|
||||||
|
|
||||||
|
private long pending;
|
||||||
|
private int shift;
|
||||||
|
private final long mask;
|
||||||
|
private int position;
|
||||||
|
|
||||||
|
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
|
||||||
|
throws IOException {
|
||||||
|
super(valueCount, bitsPerValue, in);
|
||||||
|
pending = 0;
|
||||||
|
shift = 64;
|
||||||
|
mask = ~(~0L << bitsPerValue);
|
||||||
|
position = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long next() throws IOException {
|
||||||
|
if (shift + bitsPerValue > 64) {
|
||||||
|
pending = in.readLong();
|
||||||
|
shift = 0;
|
||||||
|
}
|
||||||
|
final long next = (pending >>> shift) & mask;
|
||||||
|
shift += bitsPerValue;
|
||||||
|
++position;
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ord() {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long advance(int ord) throws IOException {
|
||||||
|
assert ord < valueCount : "ord must be less than valueCount";
|
||||||
|
assert ord > position : "ord must be greater than the current position";
|
||||||
|
|
||||||
|
final int valuesPerBlock = 64 / bitsPerValue;
|
||||||
|
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
|
||||||
|
final long targetBlock = ord / valuesPerBlock;
|
||||||
|
final long blocksToSkip = targetBlock - nextBlock;
|
||||||
|
if (blocksToSkip > 0) {
|
||||||
|
final long skip = blocksToSkip << 3;
|
||||||
|
final long filePointer = in.getFilePointer();
|
||||||
|
|
||||||
|
in.seek(filePointer + skip);
|
||||||
|
shift = 64;
|
||||||
|
|
||||||
|
final int offsetInBlock = ord % valuesPerBlock;
|
||||||
|
for (int i = 0; i < offsetInBlock; ++i) {
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = position; i < ord - 1; ++i) {
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
position = ord - 1;
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Writer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link Writer} for {@link Packed64SingleBlock} readers.
|
||||||
|
*/
|
||||||
|
final class Packed64SingleBlockWriter extends Writer {
|
||||||
|
|
||||||
|
private long pending;
|
||||||
|
private int shift;
|
||||||
|
private int written;
|
||||||
|
|
||||||
|
Packed64SingleBlockWriter(DataOutput out, int valueCount,
|
||||||
|
int bitsPerValue) throws IOException {
|
||||||
|
super(out, valueCount, bitsPerValue);
|
||||||
|
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
|
||||||
|
pending = 0;
|
||||||
|
shift = 0;
|
||||||
|
written = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int getFormat() {
|
||||||
|
return PackedInts.PACKED_SINGLE_BLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(long v) throws IOException {
|
||||||
|
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
|
||||||
|
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
|
||||||
|
assert v >= 0;
|
||||||
|
|
||||||
|
if (shift + bitsPerValue > Long.SIZE) {
|
||||||
|
out.writeLong(pending);
|
||||||
|
pending = 0;
|
||||||
|
shift = 0;
|
||||||
|
}
|
||||||
|
pending |= v << shift;
|
||||||
|
shift += bitsPerValue;
|
||||||
|
++written;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish() throws IOException {
|
||||||
|
while (written < valueCount) {
|
||||||
|
add(0L); // Auto flush
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shift > 0) {
|
||||||
|
// add was called at least once
|
||||||
|
out.writeLong(pending);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
|
||||||
|
+ bitsPerValue + " bits/value)";
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** 24 bitsPerValue backed by byte[] */
|
||||||
|
final class Packed8ThreeBlocks extends PackedInts.ReaderImpl
|
||||||
|
implements PackedInts.Mutable {
|
||||||
|
|
||||||
|
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||||
|
|
||||||
|
private final byte[] blocks;
|
||||||
|
|
||||||
|
Packed8ThreeBlocks(int valueCount) {
|
||||||
|
super(valueCount, 24);
|
||||||
|
if (valueCount > MAX_SIZE) {
|
||||||
|
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||||
|
}
|
||||||
|
this.blocks = new byte[3 * valueCount];
|
||||||
|
}
|
||||||
|
|
||||||
|
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||||
|
this(valueCount);
|
||||||
|
for (int i = 0; i < blocks.length; i++) {
|
||||||
|
blocks[i] = in.readByte();
|
||||||
|
}
|
||||||
|
final int mod = blocks.length % 8;
|
||||||
|
if (mod != 0) {
|
||||||
|
final int pad = 8 - mod;
|
||||||
|
// round out long
|
||||||
|
for (int i = 0; i < pad; i++) {
|
||||||
|
in.readByte();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long get(int index) {
|
||||||
|
final int o = index * 3;
|
||||||
|
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void set(int index, long value) {
|
||||||
|
final int o = index * 3;
|
||||||
|
blocks[o+2] = (byte) value;
|
||||||
|
blocks[o+1] = (byte) (value >> 8);
|
||||||
|
blocks[o] = (byte) (value >> 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
Arrays.fill(blocks, (byte) 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.sizeOf(blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||||
|
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.Constants;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@ -38,10 +37,33 @@ import java.io.IOException;
|
||||||
|
|
||||||
public class PackedInts {
|
public class PackedInts {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* At most 700% memory overhead, always select a direct implementation.
|
||||||
|
*/
|
||||||
|
public static final float FASTEST = 7f;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* At most 50% memory overhead, always select a reasonably fast implementation.
|
||||||
|
*/
|
||||||
|
public static final float FAST = 0.5f;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* At most 20% memory overhead.
|
||||||
|
*/
|
||||||
|
public static final float DEFAULT = 0.2f;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* No memory overhead at all, but the returned implementation may be slow.
|
||||||
|
*/
|
||||||
|
public static final float COMPACT = 0f;
|
||||||
|
|
||||||
private final static String CODEC_NAME = "PackedInts";
|
private final static String CODEC_NAME = "PackedInts";
|
||||||
private final static int VERSION_START = 0;
|
private final static int VERSION_START = 0;
|
||||||
private final static int VERSION_CURRENT = VERSION_START;
|
private final static int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static final int PACKED = 0;
|
||||||
|
static final int PACKED_SINGLE_BLOCK = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A read-only random access array of positive integers.
|
* A read-only random access array of positive integers.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
|
@ -104,6 +126,34 @@ public class PackedInts {
|
||||||
long advance(int ord) throws IOException;
|
long advance(int ord) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static abstract class ReaderIteratorImpl implements ReaderIterator {
|
||||||
|
|
||||||
|
protected final IndexInput in;
|
||||||
|
protected final int bitsPerValue;
|
||||||
|
protected final int valueCount;
|
||||||
|
|
||||||
|
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
|
||||||
|
this.in = in;
|
||||||
|
this.bitsPerValue = bitsPerValue;
|
||||||
|
this.valueCount = valueCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getBitsPerValue() {
|
||||||
|
return bitsPerValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return valueCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A packed integer array that can be modified.
|
* A packed integer array that can be modified.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
|
@ -119,7 +169,6 @@ public class PackedInts {
|
||||||
/**
|
/**
|
||||||
* Sets all values to 0.
|
* Sets all values to 0.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void clear();
|
void clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -145,10 +194,6 @@ public class PackedInts {
|
||||||
return valueCount;
|
return valueCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getMaxValue() { // Convenience method
|
|
||||||
return maxValue(bitsPerValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Object getArray() {
|
public Object getArray() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -176,8 +221,10 @@ public class PackedInts {
|
||||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||||
out.writeVInt(bitsPerValue);
|
out.writeVInt(bitsPerValue);
|
||||||
out.writeVInt(valueCount);
|
out.writeVInt(valueCount);
|
||||||
|
out.writeVInt(getFormat());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected abstract int getFormat();
|
||||||
public abstract void add(long v) throws IOException;
|
public abstract void add(long v) throws IOException;
|
||||||
public abstract void finish() throws IOException;
|
public abstract void finish() throws IOException;
|
||||||
}
|
}
|
||||||
|
@ -185,6 +232,7 @@ public class PackedInts {
|
||||||
/**
|
/**
|
||||||
* Retrieve PackedInt data from the DataInput and return a packed int
|
* Retrieve PackedInt data from the DataInput and return a packed int
|
||||||
* structure based on it.
|
* structure based on it.
|
||||||
|
*
|
||||||
* @param in positioned at the beginning of a stored packed int structure.
|
* @param in positioned at the beginning of a stored packed int structure.
|
||||||
* @return a read only random access capable array of positive integers.
|
* @return a read only random access capable array of positive integers.
|
||||||
* @throws IOException if the structure could not be retrieved.
|
* @throws IOException if the structure could not be retrieved.
|
||||||
|
@ -195,22 +243,30 @@ public class PackedInts {
|
||||||
final int bitsPerValue = in.readVInt();
|
final int bitsPerValue = in.readVInt();
|
||||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||||
final int valueCount = in.readVInt();
|
final int valueCount = in.readVInt();
|
||||||
|
final int format = in.readVInt();
|
||||||
|
|
||||||
|
switch (format) {
|
||||||
|
case PACKED:
|
||||||
switch (bitsPerValue) {
|
switch (bitsPerValue) {
|
||||||
case 8:
|
case 8:
|
||||||
return new Direct8(in, valueCount);
|
return new Direct8(in, valueCount);
|
||||||
case 16:
|
case 16:
|
||||||
return new Direct16(in, valueCount);
|
return new Direct16(in, valueCount);
|
||||||
|
case 24:
|
||||||
|
return new Packed8ThreeBlocks(in, valueCount);
|
||||||
case 32:
|
case 32:
|
||||||
return new Direct32(in, valueCount);
|
return new Direct32(in, valueCount);
|
||||||
|
case 48:
|
||||||
|
return new Packed16ThreeBlocks(in, valueCount);
|
||||||
case 64:
|
case 64:
|
||||||
return new Direct64(in, valueCount);
|
return new Direct64(in, valueCount);
|
||||||
default:
|
default:
|
||||||
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
|
|
||||||
return new Packed64(in, valueCount, bitsPerValue);
|
return new Packed64(in, valueCount, bitsPerValue);
|
||||||
} else {
|
|
||||||
return new Packed32(in, valueCount, bitsPerValue);
|
|
||||||
}
|
}
|
||||||
|
case PACKED_SINGLE_BLOCK:
|
||||||
|
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
|
||||||
|
default:
|
||||||
|
throw new AssertionError("Unknwown Writer format: " + format);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -226,7 +282,15 @@ public class PackedInts {
|
||||||
final int bitsPerValue = in.readVInt();
|
final int bitsPerValue = in.readVInt();
|
||||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||||
final int valueCount = in.readVInt();
|
final int valueCount = in.readVInt();
|
||||||
return new PackedReaderIterator(bitsPerValue, valueCount, in);
|
final int format = in.readVInt();
|
||||||
|
switch (format) {
|
||||||
|
case PACKED:
|
||||||
|
return new PackedReaderIterator(valueCount, bitsPerValue, in);
|
||||||
|
case PACKED_SINGLE_BLOCK:
|
||||||
|
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
|
||||||
|
default:
|
||||||
|
throw new AssertionError("Unknwown Writer format: " + format);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -243,55 +307,126 @@ public class PackedInts {
|
||||||
final int bitsPerValue = in.readVInt();
|
final int bitsPerValue = in.readVInt();
|
||||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||||
final int valueCount = in.readVInt();
|
final int valueCount = in.readVInt();
|
||||||
return new DirectReader(bitsPerValue, valueCount, in);
|
final int format = in.readVInt();
|
||||||
|
switch (format) {
|
||||||
|
case PACKED:
|
||||||
|
return new DirectPackedReader(bitsPerValue, valueCount, in);
|
||||||
|
case PACKED_SINGLE_BLOCK:
|
||||||
|
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
|
||||||
|
default:
|
||||||
|
throw new AssertionError("Unknwown Writer format: " + format);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a packed integer array with the given amount of values initialized
|
* Create a packed integer array with the given amount of values initialized
|
||||||
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
|
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
|
||||||
* All Mutables known by this factory are kept fully in RAM.
|
* All Mutables known by this factory are kept fully in RAM.
|
||||||
* @param valueCount the number of elements.
|
*
|
||||||
* @param bitsPerValue the number of bits available for any given value.
|
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
||||||
* @return a mutable packed integer array.
|
* for speed by selecting a faster but potentially less memory-efficient
|
||||||
|
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||||
|
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
|
||||||
|
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
||||||
|
* that the fastest implementation is selected.
|
||||||
|
*
|
||||||
|
* @param valueCount the number of elements
|
||||||
|
* @param bitsPerValue the number of bits available for any given value
|
||||||
|
* @param acceptableOverheadRatio an acceptable overhead
|
||||||
|
* ratio per value
|
||||||
|
* @return a mutable packed integer array
|
||||||
* @throws java.io.IOException if the Mutable could not be created. With the
|
* @throws java.io.IOException if the Mutable could not be created. With the
|
||||||
* current implementations, this never happens, but the method
|
* current implementations, this never happens, but the method
|
||||||
* signature allows for future persistence-backed Mutables.
|
* signature allows for future persistence-backed Mutables.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static Mutable getMutable(
|
public static Mutable getMutable(int valueCount,
|
||||||
int valueCount, int bitsPerValue) {
|
int bitsPerValue, float acceptableOverheadRatio) {
|
||||||
switch (bitsPerValue) {
|
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||||
case 8:
|
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||||
|
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||||
|
|
||||||
|
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||||
|
|
||||||
|
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||||
return new Direct8(valueCount);
|
return new Direct8(valueCount);
|
||||||
case 16:
|
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||||
return new Direct16(valueCount);
|
return new Direct16(valueCount);
|
||||||
case 32:
|
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||||
return new Direct32(valueCount);
|
return new Direct32(valueCount);
|
||||||
case 64:
|
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||||
return new Direct64(valueCount);
|
return new Direct64(valueCount);
|
||||||
default:
|
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||||
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
|
return new Packed8ThreeBlocks(valueCount);
|
||||||
return new Packed64(valueCount, bitsPerValue);
|
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||||
|
return new Packed16ThreeBlocks(valueCount);
|
||||||
} else {
|
} else {
|
||||||
return new Packed32(valueCount, bitsPerValue);
|
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||||
|
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||||
|
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
||||||
|
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||||
|
if (overhead <= acceptableOverhead) {
|
||||||
|
return Packed64SingleBlock.create(valueCount, bpv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return new Packed64(valueCount, bitsPerValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a packed integer array writer for the given number of values at the
|
* Create a packed integer array writer for the given number of values at the
|
||||||
* given bits/value. Writers append to the given IndexOutput and has very
|
* given bits/value. Writers append to the given IndexOutput and has very
|
||||||
* low memory overhead.
|
* low memory overhead.
|
||||||
|
*
|
||||||
|
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
||||||
|
* for speed by selecting a faster but potentially less memory-efficient
|
||||||
|
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||||
|
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
|
||||||
|
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
||||||
|
* that the fastest implementation is selected.
|
||||||
|
*
|
||||||
* @param out the destination for the produced bits.
|
* @param out the destination for the produced bits.
|
||||||
* @param valueCount the number of elements.
|
* @param valueCount the number of elements.
|
||||||
* @param bitsPerValue the number of bits available for any given value.
|
* @param bitsPerValue the number of bits available for any given value.
|
||||||
|
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||||
* @return a Writer ready for receiving values.
|
* @return a Writer ready for receiving values.
|
||||||
* @throws IOException if bits could not be written to out.
|
* @throws IOException if bits could not be written to out.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue)
|
public static Writer getWriter(DataOutput out,
|
||||||
|
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||||
|
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||||
|
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||||
|
|
||||||
|
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||||
|
|
||||||
|
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||||
|
return new PackedWriter(out, valueCount, 8);
|
||||||
|
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||||
|
return new PackedWriter(out, valueCount, 16);
|
||||||
|
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||||
|
return new PackedWriter(out, valueCount, 32);
|
||||||
|
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||||
|
return new PackedWriter(out, valueCount, 64);
|
||||||
|
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||||
|
return new PackedWriter(out, valueCount, 24);
|
||||||
|
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
return new PackedWriter(out, valueCount, bitsPerValue);
|
||||||
|
} else {
|
||||||
|
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||||
|
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||||
|
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
||||||
|
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||||
|
if (overhead <= acceptableOverhead) {
|
||||||
|
return new Packed64SingleBlockWriter(out, valueCount, bpv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new PackedWriter(out, valueCount, bitsPerValue);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns how many bits are required to hold values up
|
/** Returns how many bits are required to hold values up
|
||||||
|
@ -301,14 +436,10 @@ public class PackedInts {
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static int bitsRequired(long maxValue) {
|
public static int bitsRequired(long maxValue) {
|
||||||
// Very high long values does not translate well to double, so we do an
|
if (maxValue < 0) {
|
||||||
// explicit check for the edge cases
|
throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")");
|
||||||
if (maxValue > 0x3FFFFFFFFFFFFFFFL) {
|
|
||||||
return 63;
|
|
||||||
} if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
|
|
||||||
return 62;
|
|
||||||
}
|
}
|
||||||
return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)));
|
return Math.max(1, 64 - Long.numberOfLeadingZeros(maxValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -321,26 +452,4 @@ public class PackedInts {
|
||||||
public static long maxValue(int bitsPerValue) {
|
public static long maxValue(int bitsPerValue) {
|
||||||
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
|
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Rounds bitsPerValue up to 8, 16, 32 or 64. */
|
|
||||||
public static int getNextFixedSize(int bitsPerValue) {
|
|
||||||
if (bitsPerValue <= 8) {
|
|
||||||
return 8;
|
|
||||||
} else if (bitsPerValue <= 16) {
|
|
||||||
return 16;
|
|
||||||
} else if (bitsPerValue <= 32) {
|
|
||||||
return 32;
|
|
||||||
} else {
|
|
||||||
return 64;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Possibly wastes some storage in exchange for faster lookups */
|
|
||||||
public static int getRoundedFixedSize(int bitsPerValue) {
|
|
||||||
if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok
|
|
||||||
return getNextFixedSize(bitsPerValue);
|
|
||||||
} else {
|
|
||||||
return bitsPerValue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,24 +21,18 @@ import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
|
||||||
private long pending;
|
private long pending;
|
||||||
private int pendingBitsLeft;
|
private int pendingBitsLeft;
|
||||||
private final IndexInput in;
|
|
||||||
private final int bitsPerValue;
|
|
||||||
private final int valueCount;
|
|
||||||
private int position = -1;
|
private int position = -1;
|
||||||
|
|
||||||
// masks[n-1] masks for bottom n bits
|
// masks[n-1] masks for bottom n bits
|
||||||
private final long[] masks;
|
private final long[] masks;
|
||||||
|
|
||||||
public PackedReaderIterator(int bitsPerValue, int valueCount, IndexInput in)
|
public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
super(valueCount, bitsPerValue, in);
|
||||||
|
|
||||||
this.valueCount = valueCount;
|
|
||||||
this.bitsPerValue = bitsPerValue;
|
|
||||||
|
|
||||||
this.in = in;
|
|
||||||
masks = new long[bitsPerValue];
|
masks = new long[bitsPerValue];
|
||||||
|
|
||||||
long v = 1;
|
long v = 1;
|
||||||
|
@ -48,14 +42,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getBitsPerValue() {
|
|
||||||
return bitsPerValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int size() {
|
|
||||||
return valueCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long next() throws IOException {
|
public long next() throws IOException {
|
||||||
if (pendingBitsLeft == 0) {
|
if (pendingBitsLeft == 0) {
|
||||||
pending = in.readLong();
|
pending = in.readLong();
|
||||||
|
@ -79,10 +65,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
in.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int ord() {
|
public int ord() {
|
||||||
return position;
|
return position;
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,6 +52,11 @@ class PackedWriter extends PackedInts.Writer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int getFormat() {
|
||||||
|
return PackedInts.PACKED;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Do not call this after finish
|
* Do not call this after finish
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
// TODO: some of this should be under lucene40 codec tests? is talking to codec directly?f
|
// TODO: some of this should be under lucene40 codec tests? is talking to codec directly?f
|
||||||
public class TestDocValues extends LuceneTestCase {
|
public class TestDocValues extends LuceneTestCase {
|
||||||
|
@ -71,7 +72,7 @@ public class TestDocValues extends LuceneTestCase {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
final Counter trackBytes = Counter.newCounter();
|
final Counter trackBytes = Counter.newCounter();
|
||||||
DocValuesConsumer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random()),
|
DocValuesConsumer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random()),
|
||||||
random().nextBoolean());
|
random().nextFloat() * PackedInts.FAST);
|
||||||
int maxDoc = 220;
|
int maxDoc = 220;
|
||||||
final String[] values = new String[maxDoc];
|
final String[] values = new String[maxDoc];
|
||||||
final int fixedLength = 1 + atLeast(50);
|
final int fixedLength = 1 + atLeast(50);
|
||||||
|
|
|
@ -64,6 +64,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
public class TestIndexWriter extends LuceneTestCase {
|
public class TestIndexWriter extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ -1677,7 +1678,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
|
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
|
||||||
|
|
||||||
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextBoolean());
|
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST);
|
||||||
assertEquals(5, dti.numOrd()); // +1 for null ord
|
assertEquals(5, dti.numOrd()); // +1 for null ord
|
||||||
assertEquals(4, dti.size());
|
assertEquals(4, dti.size());
|
||||||
assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef()));
|
assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef()));
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.store.*;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -56,7 +57,7 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
|
|
||||||
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
|
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
|
||||||
PackedInts.Writer w = PackedInts.getWriter(
|
PackedInts.Writer w = PackedInts.getWriter(
|
||||||
out, valueCount, nbits);
|
out, valueCount, nbits, random().nextFloat()*PackedInts.FASTEST);
|
||||||
|
|
||||||
final long[] values = new long[valueCount];
|
final long[] values = new long[valueCount];
|
||||||
for(int i=0;i<valueCount;i++) {
|
for(int i=0;i<valueCount;i++) {
|
||||||
|
@ -188,16 +189,24 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
if (bitsPerValue <= 16) {
|
if (bitsPerValue <= 16) {
|
||||||
packedInts.add(new Direct16(valueCount));
|
packedInts.add(new Direct16(valueCount));
|
||||||
}
|
}
|
||||||
if (bitsPerValue <= 31) {
|
if (bitsPerValue <= 24 && valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
|
||||||
packedInts.add(new Packed32(valueCount, bitsPerValue));
|
packedInts.add(new Packed8ThreeBlocks(valueCount));
|
||||||
}
|
}
|
||||||
if (bitsPerValue <= 32) {
|
if (bitsPerValue <= 32) {
|
||||||
packedInts.add(new Direct32(valueCount));
|
packedInts.add(new Direct32(valueCount));
|
||||||
}
|
}
|
||||||
|
if (bitsPerValue <= 48 && valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
|
||||||
|
packedInts.add(new Packed16ThreeBlocks(valueCount));
|
||||||
|
}
|
||||||
if (bitsPerValue <= 63) {
|
if (bitsPerValue <= 63) {
|
||||||
packedInts.add(new Packed64(valueCount, bitsPerValue));
|
packedInts.add(new Packed64(valueCount, bitsPerValue));
|
||||||
}
|
}
|
||||||
packedInts.add(new Direct64(valueCount));
|
packedInts.add(new Direct64(valueCount));
|
||||||
|
for (int bpv = bitsPerValue; bpv <= 64; ++bpv) {
|
||||||
|
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||||
|
packedInts.add(Packed64SingleBlock.create(valueCount, bpv));
|
||||||
|
}
|
||||||
|
}
|
||||||
return packedInts;
|
return packedInts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -242,21 +251,27 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSingleValue() throws Exception {
|
public void testSingleValue() throws Exception {
|
||||||
|
for (int bitsPerValue = 1; bitsPerValue <= 64; ++bitsPerValue) {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexOutput out = dir.createOutput("out", newIOContext(random()));
|
IndexOutput out = dir.createOutput("out", newIOContext(random()));
|
||||||
PackedInts.Writer w = PackedInts.getWriter(out, 1, 8);
|
PackedInts.Writer w = PackedInts.getWriter(out, 1, bitsPerValue, PackedInts.DEFAULT);
|
||||||
w.add(17);
|
long value = 17L & PackedInts.maxValue(bitsPerValue);
|
||||||
|
w.add(value);
|
||||||
w.finish();
|
w.finish();
|
||||||
final long end = out.getFilePointer();
|
final long end = out.getFilePointer();
|
||||||
out.close();
|
out.close();
|
||||||
|
|
||||||
IndexInput in = dir.openInput("out", newIOContext(random()));
|
IndexInput in = dir.openInput("out", newIOContext(random()));
|
||||||
PackedInts.getReader(in);
|
Reader reader = PackedInts.getReader(in);
|
||||||
assertEquals(end, in.getFilePointer());
|
String msg = "Impl=" + w.getClass().getSimpleName() + ", bitsPerValue=" + bitsPerValue;
|
||||||
|
assertEquals(msg, 1, reader.size());
|
||||||
|
assertEquals(msg, value, reader.get(0));
|
||||||
|
assertEquals(msg, end, in.getFilePointer());
|
||||||
in.close();
|
in.close();
|
||||||
|
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testSecondaryBlockChange() throws IOException {
|
public void testSecondaryBlockChange() throws IOException {
|
||||||
PackedInts.Mutable mutable = new Packed64(26, 5);
|
PackedInts.Mutable mutable = new Packed64(26, 5);
|
||||||
|
@ -276,15 +291,36 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
int INDEX = (int)Math.pow(2, 30)+1;
|
int INDEX = (int)Math.pow(2, 30)+1;
|
||||||
int BITS = 2;
|
int BITS = 2;
|
||||||
|
|
||||||
Packed32 p32 = new Packed32(INDEX, BITS);
|
|
||||||
p32.set(INDEX-1, 1);
|
|
||||||
assertEquals("The value at position " + (INDEX-1)
|
|
||||||
+ " should be correct for Packed32", 1, p32.get(INDEX-1));
|
|
||||||
p32 = null; // To free the 256MB used
|
|
||||||
|
|
||||||
Packed64 p64 = new Packed64(INDEX, BITS);
|
Packed64 p64 = new Packed64(INDEX, BITS);
|
||||||
p64.set(INDEX-1, 1);
|
p64.set(INDEX-1, 1);
|
||||||
assertEquals("The value at position " + (INDEX-1)
|
assertEquals("The value at position " + (INDEX-1)
|
||||||
+ " should be correct for Packed64", 1, p64.get(INDEX-1));
|
+ " should be correct for Packed64", 1, p64.get(INDEX-1));
|
||||||
|
p64 = null;
|
||||||
|
|
||||||
|
for (int bits = 1; bits <=64; ++bits) {
|
||||||
|
if (Packed64SingleBlock.isSupported(bits)) {
|
||||||
|
int index = Integer.MAX_VALUE / bits + (bits == 1 ? 0 : 1);
|
||||||
|
Packed64SingleBlock p64sb = Packed64SingleBlock.create(index, bits);
|
||||||
|
p64sb.set(index - 1, 1);
|
||||||
|
assertEquals("The value at position " + (index-1)
|
||||||
|
+ " should be correct for " + p64sb.getClass().getSimpleName(),
|
||||||
|
1, p64sb.get(index-1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int index = Integer.MAX_VALUE / 24 + 1;
|
||||||
|
Packed8ThreeBlocks p8 = new Packed8ThreeBlocks(index);
|
||||||
|
p8.set(index - 1, 1);
|
||||||
|
assertEquals("The value at position " + (index-1)
|
||||||
|
+ " should be correct for Packed8ThreeBlocks", 1, p8.get(index-1));
|
||||||
|
p8 = null;
|
||||||
|
|
||||||
|
index = Integer.MAX_VALUE / 48 + 1;
|
||||||
|
Packed16ThreeBlocks p16 = new Packed16ThreeBlocks(index);
|
||||||
|
p16.set(index - 1, 1);
|
||||||
|
assertEquals("The value at position " + (index-1)
|
||||||
|
+ " should be correct for Packed16ThreeBlocks", 1, p16.get(index-1));
|
||||||
|
p16 = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||||
import org.apache.lucene.search.FieldCache.DocTerms;
|
import org.apache.lucene.search.FieldCache.DocTerms;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use a field value and find the Document Frequency within another field.
|
* Use a field value and find the Document Frequency within another field.
|
||||||
|
@ -52,7 +53,7 @@ public class JoinDocFreqValueSource extends FieldCacheSource {
|
||||||
@Override
|
@Override
|
||||||
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
|
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
|
||||||
{
|
{
|
||||||
final DocTerms terms = cache.getTerms(readerContext.reader(), field, true );
|
final DocTerms terms = cache.getTerms(readerContext.reader(), field, PackedInts.FAST);
|
||||||
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
|
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
|
||||||
|
|
||||||
return new IntDocValues(this) {
|
return new IntDocValues(this) {
|
||||||
|
|
Loading…
Reference in New Issue