mirror of https://github.com/apache/lucene.git
LUCENE-4062: add new aligned packed bits impls for faster performance
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1342751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f4819005cf
commit
6a4a717220
|
@ -921,6 +921,11 @@ Optimizations
|
|||
* LUCENE-4061: improve synchronization in DirectoryTaxonomyWriter.addCategory
|
||||
and few general improvements to DirectoryTaxonomyWriter.
|
||||
(Shai Erera, Gilad Barkai)
|
||||
|
||||
* LUCENE-4062: Add new aligned packed bits impls for faster lookup
|
||||
performance; add float acceptableOverheadRatio to getWriter and
|
||||
getMutable API to give packed ints freedom to pick faster
|
||||
implementations (Adrien Grand via Mike McCandless)
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
|
|
@ -328,8 +328,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
// we'd have to try @ fewer bits and then grow
|
||||
// if we overflowed it.
|
||||
|
||||
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue());
|
||||
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue());
|
||||
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
|
||||
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
|
||||
|
||||
termsDictOffsets = termsDictOffsetsM;
|
||||
termOffsets = termOffsetsM;
|
||||
|
|
|
@ -183,7 +183,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
// write primary terms dict offsets
|
||||
packedIndexStart = out.getFilePointer();
|
||||
|
||||
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer));
|
||||
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer), PackedInts.DEFAULT);
|
||||
|
||||
// relative to our indexStart
|
||||
long upto = 0;
|
||||
|
@ -196,7 +196,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
packedOffsetsStart = out.getFilePointer();
|
||||
|
||||
// write offsets into the byte[] terms
|
||||
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength));
|
||||
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength), PackedInts.DEFAULT);
|
||||
upto = 0;
|
||||
for(int i=0;i<numIndexTerms;i++) {
|
||||
w.add(upto);
|
||||
|
|
|
@ -74,7 +74,7 @@ class TermInfosReaderIndex {
|
|||
PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
|
||||
|
||||
final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
|
||||
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, false);
|
||||
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT);
|
||||
|
||||
String currentField = null;
|
||||
List<String> fieldStrs = new ArrayList<String>();
|
||||
|
|
|
@ -115,17 +115,19 @@ public final class Bytes {
|
|||
* {@link Writer}. A call to {@link Writer#finish(int)} will release
|
||||
* all internally used resources and frees the memory tracking
|
||||
* reference.
|
||||
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
|
||||
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
|
||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
|
||||
* @param acceptableOverheadRatio
|
||||
* how to trade space for speed. This option is only applicable for
|
||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
|
||||
* {@link Type#BYTES_VAR_SORTED}.
|
||||
* @param context I/O Context
|
||||
* @return a new {@link Writer} instance
|
||||
* @throws IOException
|
||||
* if the files for the writer can not be created.
|
||||
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
|
||||
*/
|
||||
public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
|
||||
boolean fixedSize, Comparator<BytesRef> sortComparator,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
|
||||
Counter bytesUsed, IOContext context, float acceptableOverheadRatio)
|
||||
throws IOException {
|
||||
// TODO -- i shouldn't have to specify fixed? can
|
||||
// track itself & do the write thing at write time?
|
||||
|
@ -139,7 +141,7 @@ public final class Bytes {
|
|||
} else if (mode == Mode.DEREF) {
|
||||
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
|
||||
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
|
||||
}
|
||||
} else {
|
||||
if (mode == Mode.STRAIGHT) {
|
||||
|
@ -147,7 +149,7 @@ public final class Bytes {
|
|||
} else if (mode == Mode.DEREF) {
|
||||
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
|
||||
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -382,32 +384,32 @@ public final class Bytes {
|
|||
protected int lastDocId = -1;
|
||||
protected int[] docToEntry;
|
||||
protected final BytesRefHash hash;
|
||||
protected final boolean fasterButMoreRam;
|
||||
protected final float acceptableOverheadRatio;
|
||||
protected long maxBytes = 0;
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||
int codecVersion, Counter bytesUsed, IOContext context, Type type)
|
||||
throws IOException {
|
||||
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, PackedInts.DEFAULT, type);
|
||||
}
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
|
||||
int codecVersion, Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type)
|
||||
throws IOException {
|
||||
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, acceptableOverheadRatio, type);
|
||||
}
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
|
||||
Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type) throws IOException {
|
||||
super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
|
||||
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||
docToEntry = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
this.fasterButMoreRam = fasterButMoreRam;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
}
|
||||
|
||||
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
|
||||
|
@ -506,7 +508,7 @@ public final class Bytes {
|
|||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||
long maxValue, int[] addresses, int[] toEntry) throws IOException {
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
bitsRequired(maxValue));
|
||||
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
|
||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||
: docCount;
|
||||
assert toEntry.length >= limit -1;
|
||||
|
@ -530,7 +532,7 @@ public final class Bytes {
|
|||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||
long maxValue, long[] addresses, int[] toEntry) throws IOException {
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
bitsRequired(maxValue));
|
||||
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
|
||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||
: docCount;
|
||||
assert toEntry.length >= limit -1;
|
||||
|
@ -550,11 +552,6 @@ public final class Bytes {
|
|||
}
|
||||
w.finish();
|
||||
}
|
||||
|
||||
protected int bitsRequired(long maxValue){
|
||||
return fasterButMoreRam ?
|
||||
PackedInts.getNextFixedSize(PackedInts.bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Abstract base class for PerDocConsumer implementations
|
||||
|
@ -41,7 +42,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
protected final String segmentName;
|
||||
private final Counter bytesUsed;
|
||||
protected final IOContext context;
|
||||
private final boolean fasterButMoreRam;
|
||||
private final float acceptableOverheadRatio;
|
||||
|
||||
/**
|
||||
* Filename extension for index files
|
||||
|
@ -57,20 +58,22 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
* @param state The state to initiate a {@link PerDocConsumer} instance
|
||||
*/
|
||||
protected DocValuesWriterBase(PerDocWriteState state) {
|
||||
this(state, true);
|
||||
this(state, PackedInts.FAST);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param state The state to initiate a {@link PerDocConsumer} instance
|
||||
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
|
||||
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
|
||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
|
||||
* @param acceptableOverheadRatio
|
||||
* how to trade space for speed. This option is only applicable for
|
||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
|
||||
* {@link Type#BYTES_VAR_SORTED}.
|
||||
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
|
||||
*/
|
||||
protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) {
|
||||
protected DocValuesWriterBase(PerDocWriteState state, float acceptableOverheadRatio) {
|
||||
this.segmentName = state.segmentName;
|
||||
this.bytesUsed = state.bytesUsed;
|
||||
this.context = state.context;
|
||||
this.fasterButMoreRam = fasterButMoreRam;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
}
|
||||
|
||||
protected abstract Directory getDirectory() throws IOException;
|
||||
|
@ -83,7 +86,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
|
||||
return Writer.create(valueType,
|
||||
PerDocProducerBase.docValuesId(segmentName, field.number),
|
||||
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
|
||||
getDirectory(), getComparator(), bytesUsed, context, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -58,8 +58,8 @@ class FixedSortedBytesImpl {
|
|||
private final Comparator<BytesRef> comp;
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
|
||||
Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
|
||||
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_FIXED_SORTED);
|
||||
this.comp = comp;
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@ class FixedSortedBytesImpl {
|
|||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
idxOut.writeInt(maxOrd);
|
||||
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
|
||||
PackedInts.bitsRequired(maxOrd));
|
||||
PackedInts.bitsRequired(maxOrd), PackedInts.DEFAULT);
|
||||
for (SortedSourceSlice slice : slices) {
|
||||
slice.writeOrds(ordsWriter);
|
||||
}
|
||||
|
|
|
@ -103,7 +103,7 @@ class PackedIntValues {
|
|||
: ++maxValue - minValue;
|
||||
datOut.writeLong(defaultValue);
|
||||
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
|
||||
PackedInts.bitsRequired(maxValue - minValue));
|
||||
PackedInts.bitsRequired(maxValue - minValue), PackedInts.DEFAULT);
|
||||
for (int i = 0; i < lastDocID + 1; i++) {
|
||||
set(bytesRef, i);
|
||||
byte[] bytes = bytesRef.bytes;
|
||||
|
|
|
@ -60,8 +60,8 @@ final class VarSortedBytesImpl {
|
|||
private final Comparator<BytesRef> comp;
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
|
||||
Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
|
||||
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_VAR_SORTED);
|
||||
this.comp = comp;
|
||||
size = 0;
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ final class VarSortedBytesImpl {
|
|||
|
||||
idxOut.writeLong(maxBytes);
|
||||
final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1,
|
||||
PackedInts.bitsRequired(maxBytes));
|
||||
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
|
||||
offsetWriter.add(0);
|
||||
for (int i = 0; i < maxOrd; i++) {
|
||||
offsetWriter.add(offsets[i]);
|
||||
|
@ -91,7 +91,7 @@ final class VarSortedBytesImpl {
|
|||
offsetWriter.finish();
|
||||
|
||||
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
|
||||
PackedInts.bitsRequired(maxOrd-1));
|
||||
PackedInts.bitsRequired(maxOrd-1), PackedInts.DEFAULT);
|
||||
for (SortedSourceSlice slice : slices) {
|
||||
slice.writeOrds(ordsWriter);
|
||||
}
|
||||
|
@ -127,7 +127,7 @@ final class VarSortedBytesImpl {
|
|||
// total bytes of data
|
||||
idxOut.writeLong(maxBytes);
|
||||
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
|
||||
bitsRequired(maxBytes));
|
||||
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
|
||||
// first dump bytes data, recording index & write offset as
|
||||
// we go
|
||||
final BytesRef spare = new BytesRef();
|
||||
|
|
|
@ -198,7 +198,7 @@ class VarStraightBytesImpl {
|
|||
if (lastDocID == -1) {
|
||||
idxOut.writeVLong(0);
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
||||
PackedInts.bitsRequired(0));
|
||||
PackedInts.bitsRequired(0), PackedInts.DEFAULT);
|
||||
// docCount+1 so we write sentinel
|
||||
for (int i = 0; i < docCount+1; i++) {
|
||||
w.add(0);
|
||||
|
@ -208,7 +208,7 @@ class VarStraightBytesImpl {
|
|||
fill(docCount, address);
|
||||
idxOut.writeVLong(address);
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
||||
PackedInts.bitsRequired(address));
|
||||
PackedInts.bitsRequired(address), PackedInts.DEFAULT);
|
||||
for (int i = 0; i < docCount; i++) {
|
||||
w.add(docToAddress[i]);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Abstract API for per-document stored primitive values of type <tt>byte[]</tt>
|
||||
|
@ -77,14 +78,16 @@ abstract class Writer extends DocValuesConsumer {
|
|||
* the {@link Directory} to create the files from.
|
||||
* @param bytesUsed
|
||||
* a byte-usage tracking reference
|
||||
* @param fasterButMoreRam Whether the space used for packed ints should be rounded up for higher lookup performance.
|
||||
* Currently this parameter only applies for types {@link Type#BYTES_VAR_SORTED}
|
||||
* and {@link Type#BYTES_FIXED_SORTED}.
|
||||
* @param acceptableOverheadRatio
|
||||
* how to trade space for speed. This option is only applicable for
|
||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
|
||||
* {@link Type#BYTES_VAR_SORTED}.
|
||||
* @return a new {@link Writer} instance for the given {@link Type}
|
||||
* @throws IOException
|
||||
* @see PackedInts#getReader(org.apache.lucene.store.DataInput, float)
|
||||
*/
|
||||
public static DocValuesConsumer create(Type type, String id, Directory directory,
|
||||
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
|
||||
if (comp == null) {
|
||||
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
@ -101,22 +104,22 @@ abstract class Writer extends DocValuesConsumer {
|
|||
return Floats.getWriter(directory, id, bytesUsed, context, type);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
bytesUsed, context, acceptableOverheadRatio);
|
||||
case BYTES_FIXED_DEREF:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
bytesUsed, context, acceptableOverheadRatio);
|
||||
case BYTES_FIXED_SORTED:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
bytesUsed, context, acceptableOverheadRatio);
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
bytesUsed, context, acceptableOverheadRatio);
|
||||
case BYTES_VAR_DEREF:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
bytesUsed, context, acceptableOverheadRatio);
|
||||
case BYTES_VAR_SORTED:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
bytesUsed, context, acceptableOverheadRatio);
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown Values: " + type);
|
||||
}
|
||||
|
|
|
@ -494,7 +494,7 @@ public interface FieldCache {
|
|||
* faster lookups (default is "true"). Note that the
|
||||
* first call for a given reader and field "wins",
|
||||
* subsequent calls will share the same cache entry. */
|
||||
public DocTerms getTerms (AtomicReader reader, String field, boolean fasterButMoreRAM)
|
||||
public DocTerms getTerms (AtomicReader reader, String field, float acceptableOverheadRatio)
|
||||
throws IOException;
|
||||
|
||||
/** Returned by {@link #getTermsIndex} */
|
||||
|
@ -571,7 +571,7 @@ public interface FieldCache {
|
|||
* faster lookups (default is "true"). Note that the
|
||||
* first call for a given reader and field "wins",
|
||||
* subsequent calls will share the same cache entry. */
|
||||
public DocTermsIndex getTermsIndex (AtomicReader reader, String field, boolean fasterButMoreRAM)
|
||||
public DocTermsIndex getTermsIndex (AtomicReader reader, String field, float acceptableOverheadRatio)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
|
|
|
@ -1071,14 +1071,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
}
|
||||
}
|
||||
|
||||
private static boolean DEFAULT_FASTER_BUT_MORE_RAM = true;
|
||||
|
||||
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
|
||||
return getTermsIndex(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
|
||||
return getTermsIndex(reader, field, PackedInts.FAST);
|
||||
}
|
||||
|
||||
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException {
|
||||
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false);
|
||||
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
|
||||
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
|
||||
}
|
||||
|
||||
static class DocTermsIndexCache extends Cache {
|
||||
|
@ -1092,7 +1090,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
|
||||
Terms terms = reader.terms(entryKey.field);
|
||||
|
||||
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
|
||||
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
|
||||
|
||||
final PagedBytes bytes = new PagedBytes(15);
|
||||
|
||||
|
@ -1142,8 +1140,8 @@ class FieldCacheImpl implements FieldCache {
|
|||
startNumUniqueTerms = 1;
|
||||
}
|
||||
|
||||
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM);
|
||||
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, fasterButMoreRAM);
|
||||
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
|
||||
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
|
||||
|
||||
// 0 is reserved for "unset"
|
||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||
|
@ -1219,11 +1217,11 @@ class FieldCacheImpl implements FieldCache {
|
|||
// TODO: this if DocTermsIndex was already created, we
|
||||
// should share it...
|
||||
public DocTerms getTerms(AtomicReader reader, String field) throws IOException {
|
||||
return getTerms(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
|
||||
return getTerms(reader, field, PackedInts.FAST);
|
||||
}
|
||||
|
||||
public DocTerms getTerms(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException {
|
||||
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false);
|
||||
public DocTerms getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
|
||||
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
|
||||
}
|
||||
|
||||
static final class DocTermsCache extends Cache {
|
||||
|
@ -1237,7 +1235,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
|
||||
Terms terms = reader.terms(entryKey.field);
|
||||
|
||||
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
|
||||
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
|
||||
|
||||
final int termCountHardLimit = reader.maxDoc();
|
||||
|
||||
|
@ -1268,7 +1266,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
startBPV = 1;
|
||||
}
|
||||
|
||||
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM);
|
||||
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), acceptableOverheadRatio);
|
||||
|
||||
// pointer==0 means not set
|
||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
final class DirectPacked64SingleBlockReader extends PackedInts.ReaderImpl {
|
||||
|
||||
private final IndexInput in;
|
||||
private final long startPointer;
|
||||
private final int valuesPerBlock;
|
||||
private final long mask;
|
||||
|
||||
DirectPacked64SingleBlockReader(int bitsPerValue, int valueCount,
|
||||
IndexInput in) {
|
||||
super(valueCount, bitsPerValue);
|
||||
this.in = in;
|
||||
startPointer = in.getFilePointer();
|
||||
valuesPerBlock = 64 / bitsPerValue;
|
||||
mask = ~(~0L << bitsPerValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int blockOffset = index / valuesPerBlock;
|
||||
final long skip = ((long) blockOffset) << 3;
|
||||
try {
|
||||
in.seek(startPointer + skip);
|
||||
|
||||
long block = in.readLong();
|
||||
final int offsetInBlock = index % valuesPerBlock;
|
||||
return (block >>> (offsetInBlock * bitsPerValue)) & mask;
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException("failed", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -22,11 +22,9 @@ import org.apache.lucene.store.IndexInput;
|
|||
import java.io.IOException;
|
||||
|
||||
/* Reads directly from disk on each get */
|
||||
final class DirectReader implements PackedInts.Reader {
|
||||
final class DirectPackedReader extends PackedInts.ReaderImpl {
|
||||
private final IndexInput in;
|
||||
private final long startPointer;
|
||||
private final int bitsPerValue;
|
||||
private final int valueCount;
|
||||
|
||||
private static final int BLOCK_BITS = Packed64.BLOCK_BITS;
|
||||
private static final int MOD_MASK = Packed64.MOD_MASK;
|
||||
|
@ -34,10 +32,9 @@ final class DirectReader implements PackedInts.Reader {
|
|||
// masks[n-1] masks for bottom n bits
|
||||
private final long[] masks;
|
||||
|
||||
public DirectReader(int bitsPerValue, int valueCount, IndexInput in)
|
||||
public DirectPackedReader(int bitsPerValue, int valueCount, IndexInput in)
|
||||
throws IOException {
|
||||
this.valueCount = valueCount;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
super(valueCount, bitsPerValue);
|
||||
this.in = in;
|
||||
|
||||
long v = 1;
|
||||
|
@ -50,26 +47,6 @@ final class DirectReader implements PackedInts.Reader {
|
|||
startPointer = in.getFilePointer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBitsPerValue() {
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final long majorBitPos = (long)index * bitsPerValue;
|
|
@ -28,22 +28,14 @@ public class GrowableWriter implements PackedInts.Mutable {
|
|||
|
||||
private long currentMaxValue;
|
||||
private PackedInts.Mutable current;
|
||||
private final boolean roundFixedSize;
|
||||
private final float acceptableOverheadRatio;
|
||||
|
||||
public GrowableWriter(int startBitsPerValue, int valueCount, boolean roundFixedSize) {
|
||||
this.roundFixedSize = roundFixedSize;
|
||||
current = PackedInts.getMutable(valueCount, getSize(startBitsPerValue));
|
||||
public GrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
|
||||
currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
|
||||
}
|
||||
|
||||
private final int getSize(int bpv) {
|
||||
if (roundFixedSize) {
|
||||
return PackedInts.getNextFixedSize(bpv);
|
||||
} else {
|
||||
return bpv;
|
||||
}
|
||||
}
|
||||
|
||||
public long get(int index) {
|
||||
return current.get(index);
|
||||
}
|
||||
|
@ -78,7 +70,7 @@ public class GrowableWriter implements PackedInts.Mutable {
|
|||
currentMaxValue *= 2;
|
||||
}
|
||||
final int valueCount = size();
|
||||
PackedInts.Mutable next = PackedInts.getMutable(valueCount, getSize(bpv));
|
||||
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bpv, acceptableOverheadRatio);
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
next.set(i, current.get(i));
|
||||
}
|
||||
|
@ -93,11 +85,12 @@ public class GrowableWriter implements PackedInts.Mutable {
|
|||
}
|
||||
|
||||
public GrowableWriter resize(int newSize) {
|
||||
GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, roundFixedSize);
|
||||
GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
|
||||
final int limit = Math.min(size(), newSize);
|
||||
for(int i=0;i<limit;i++) {
|
||||
next.set(i, get(i));
|
||||
}
|
||||
return next;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** 48 bitsPerValue backed by short[] */
|
||||
final class Packed16ThreeBlocks extends PackedInts.ReaderImpl
|
||||
implements PackedInts.Mutable {
|
||||
|
||||
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||
|
||||
private final short[] blocks;
|
||||
|
||||
Packed16ThreeBlocks(int valueCount) {
|
||||
super(valueCount, 48);
|
||||
if (valueCount > MAX_SIZE) {
|
||||
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||
}
|
||||
this.blocks = new short[3 * valueCount];
|
||||
}
|
||||
|
||||
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < blocks.length; i++) {
|
||||
blocks[i] = in.readShort();
|
||||
}
|
||||
final int mod = blocks.length % 4;
|
||||
if (mod != 0) {
|
||||
final int pad = 4 - mod;
|
||||
// round out long
|
||||
for (int i = 0; i < pad; i++) {
|
||||
in.readShort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index * 3;
|
||||
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index * 3;
|
||||
blocks[o] = (short) (value >> 32);
|
||||
blocks[o+1] = (short) (value >> 16);
|
||||
blocks[o+2] = (short) value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
Arrays.fill(blocks, (short) 0);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.sizeOf(blocks);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||
}
|
||||
}
|
|
@ -1,227 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Space optimized random access capable array of values with a fixed number of
|
||||
* bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher
|
||||
* numbers.
|
||||
* </p><p>
|
||||
* The implementation strives to avoid conditionals and expensive operations,
|
||||
* sacrificing code clarity to achieve better performance.
|
||||
*/
|
||||
|
||||
class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
|
||||
static final int BLOCK_SIZE = 32; // 32 = int, 64 = long
|
||||
static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE
|
||||
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
|
||||
|
||||
private static final int ENTRY_SIZE = BLOCK_SIZE + 1;
|
||||
private static final int FAC_BITPOS = 3;
|
||||
|
||||
/*
|
||||
* In order to make an efficient value-getter, conditionals should be
|
||||
* avoided. A value can be positioned inside of a block, requiring shifting
|
||||
* left or right or it can span two blocks, requiring a left-shift on the
|
||||
* first block and a right-shift on the right block.
|
||||
* </p><p>
|
||||
* By always shifting the first block both left and right, we get exactly
|
||||
* the right bits. By always shifting the second block right and applying
|
||||
* a mask, we get the right bits there. After that, we | the two bitsets.
|
||||
*/
|
||||
private static final int[][] SHIFTS =
|
||||
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
|
||||
private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE];
|
||||
|
||||
static { // Generate shifts
|
||||
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
|
||||
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
|
||||
int[] currentShifts = SHIFTS[elementBits];
|
||||
int base = bitPos * FAC_BITPOS;
|
||||
currentShifts[base ] = bitPos;
|
||||
currentShifts[base + 1] = BLOCK_SIZE - elementBits;
|
||||
if (bitPos <= BLOCK_SIZE - elementBits) { // Single block
|
||||
currentShifts[base + 2] = 0;
|
||||
MASKS[elementBits][bitPos] = 0;
|
||||
} else { // Two blocks
|
||||
int rBits = elementBits - (BLOCK_SIZE - bitPos);
|
||||
currentShifts[base + 2] = BLOCK_SIZE - rBits;
|
||||
MASKS[elementBits][bitPos] = ~(~0 << rBits);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The setter requires more masking than the getter.
|
||||
*/
|
||||
private static final int[][] WRITE_MASKS =
|
||||
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
|
||||
static {
|
||||
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
|
||||
int elementPosMask = ~(~0 << elementBits);
|
||||
int[] currentShifts = SHIFTS[elementBits];
|
||||
int[] currentMasks = WRITE_MASKS[elementBits];
|
||||
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
|
||||
int base = bitPos * FAC_BITPOS;
|
||||
currentMasks[base ] =~((elementPosMask
|
||||
<< currentShifts[base + 1])
|
||||
>>> currentShifts[base]);
|
||||
if (bitPos <= BLOCK_SIZE - elementBits) { // Second block not used
|
||||
currentMasks[base+1] = ~0; // Keep all bits
|
||||
currentMasks[base+2] = 0; // Or with 0
|
||||
} else {
|
||||
currentMasks[base+1] = ~(elementPosMask
|
||||
<< currentShifts[base + 2]);
|
||||
currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* The bits */
|
||||
private int[] blocks;
|
||||
|
||||
// Cached calculations
|
||||
private int maxPos; // blocks.length * BLOCK_SIZE / bitsPerValue - 1
|
||||
private int[] shifts; // The shifts for the current bitsPerValue
|
||||
private int[] readMasks;
|
||||
private int[] writeMasks;
|
||||
|
||||
/**
|
||||
* Creates an array with the internal structures adjusted for the given
|
||||
* limits and initialized to 0.
|
||||
* @param valueCount the number of elements.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* Note: bitsPerValue >32 is not supported by this implementation.
|
||||
*/
|
||||
public Packed32(int valueCount, int bitsPerValue) {
|
||||
this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)],
|
||||
valueCount, bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an array with content retrieved from the given DataInput.
|
||||
* @param in a DataInput, positioned at the start of Packed64-content.
|
||||
* @param valueCount the number of elements.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* @throws java.io.IOException if the values for the backing array could not
|
||||
* be retrieved.
|
||||
*/
|
||||
public Packed32(DataInput in, int valueCount, int bitsPerValue)
|
||||
throws IOException {
|
||||
super(valueCount, bitsPerValue);
|
||||
int size = size(bitsPerValue, valueCount);
|
||||
blocks = new int[size + 1]; // +1 due to non-conditional tricks
|
||||
// TODO: find a faster way to bulk-read ints...
|
||||
for(int i = 0 ; i < size ; i++) {
|
||||
blocks[i] = in.readInt();
|
||||
}
|
||||
if (size % 2 == 1) {
|
||||
in.readInt(); // Align to long
|
||||
}
|
||||
updateCached();
|
||||
}
|
||||
|
||||
private static int size(int bitsPerValue, int valueCount) {
|
||||
final long totBitCount = (long) valueCount * bitsPerValue;
|
||||
return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates an array backed by the given blocks.
|
||||
* </p><p>
|
||||
* Note: The blocks are used directly, so changes to the given block will
|
||||
* affect the Packed32-structure.
|
||||
* @param blocks used as the internal backing array.
|
||||
* @param valueCount the number of values.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* Note: bitsPerValue >32 is not supported by this implementation.
|
||||
*/
|
||||
public Packed32(int[] blocks, int valueCount, int bitsPerValue) {
|
||||
// TODO: Check that blocks.length is sufficient for holding length values
|
||||
super(valueCount, bitsPerValue);
|
||||
if (bitsPerValue > 31) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"This array only supports values of 31 bits or less. The "
|
||||
+ "required number of bits was %d. The Packed64 "
|
||||
+ "implementation allows values with more than 31 bits",
|
||||
bitsPerValue));
|
||||
}
|
||||
this.blocks = blocks;
|
||||
updateCached();
|
||||
}
|
||||
|
||||
private void updateCached() {
|
||||
readMasks = MASKS[bitsPerValue];
|
||||
maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2);
|
||||
shifts = SHIFTS[bitsPerValue];
|
||||
writeMasks = WRITE_MASKS[bitsPerValue];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param index the position of the value.
|
||||
* @return the value at the given index.
|
||||
*/
|
||||
public long get(final int index) {
|
||||
assert index >= 0 && index < size();
|
||||
final long majorBitPos = (long)index * bitsPerValue;
|
||||
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
|
||||
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
|
||||
|
||||
final int base = bitPos * FAC_BITPOS;
|
||||
|
||||
return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
|
||||
((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
final int intValue = (int)value;
|
||||
final long majorBitPos = (long)index * bitsPerValue;
|
||||
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
|
||||
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
|
||||
final int base = bitPos * FAC_BITPOS;
|
||||
|
||||
blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base])
|
||||
| (intValue << shifts[base + 1] >>> shifts[base]);
|
||||
blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1])
|
||||
| ((intValue << shifts[base + 2])
|
||||
& writeMasks[base+2]);
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
Arrays.fill(blocks, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos
|
||||
+ ", elements.length=" + blocks.length + ")";
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.sizeOf(blocks);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,365 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class is similar to {@link Packed64} except that it trades space for
|
||||
* speed by ensuring that a single block needs to be read/written in order to
|
||||
* read/write a value.
|
||||
*/
|
||||
abstract class Packed64SingleBlock extends PackedInts.ReaderImpl
|
||||
implements PackedInts.Mutable {
|
||||
|
||||
private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] {1, 2, 3, 4,
|
||||
5, 6, 7, 9, 10, 12, 21};
|
||||
private static final long[][] WRITE_MASKS = new long[22][];
|
||||
private static final int[][] SHIFTS = new int[22][];
|
||||
static {
|
||||
for (int bpv : SUPPORTED_BITS_PER_VALUE) {
|
||||
initMasks(bpv);
|
||||
}
|
||||
}
|
||||
|
||||
protected static void initMasks(int bpv) {
|
||||
int valuesPerBlock = Long.SIZE / bpv;
|
||||
long[] writeMasks = new long[valuesPerBlock];
|
||||
int[] shifts = new int[valuesPerBlock];
|
||||
long bits = (1L << bpv) - 1;
|
||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
||||
shifts[i] = bpv * i;
|
||||
writeMasks[i] = ~(bits << shifts[i]);
|
||||
}
|
||||
WRITE_MASKS[bpv] = writeMasks;
|
||||
SHIFTS[bpv] = shifts;
|
||||
}
|
||||
|
||||
public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {
|
||||
switch (bitsPerValue) {
|
||||
case 1:
|
||||
return new Packed64SingleBlock1(valueCount);
|
||||
case 2:
|
||||
return new Packed64SingleBlock2(valueCount);
|
||||
case 3:
|
||||
return new Packed64SingleBlock3(valueCount);
|
||||
case 4:
|
||||
return new Packed64SingleBlock4(valueCount);
|
||||
case 5:
|
||||
return new Packed64SingleBlock5(valueCount);
|
||||
case 6:
|
||||
return new Packed64SingleBlock6(valueCount);
|
||||
case 7:
|
||||
return new Packed64SingleBlock7(valueCount);
|
||||
case 9:
|
||||
return new Packed64SingleBlock9(valueCount);
|
||||
case 10:
|
||||
return new Packed64SingleBlock10(valueCount);
|
||||
case 12:
|
||||
return new Packed64SingleBlock12(valueCount);
|
||||
case 21:
|
||||
return new Packed64SingleBlock21(valueCount);
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported bitsPerValue: "
|
||||
+ bitsPerValue);
|
||||
}
|
||||
}
|
||||
|
||||
public static Packed64SingleBlock create(DataInput in,
|
||||
int valueCount, int bitsPerValue) throws IOException {
|
||||
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
|
||||
for (int i = 0; i < reader.blocks.length; ++i) {
|
||||
reader.blocks[i] = in.readLong();
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
|
||||
public static boolean isSupported(int bitsPerValue) {
|
||||
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
||||
}
|
||||
|
||||
public static float overheadPerValue(int bitsPerValue) {
|
||||
int valuesPerBlock = 64 / bitsPerValue;
|
||||
int overhead = 64 % bitsPerValue;
|
||||
return (float) overhead / valuesPerBlock;
|
||||
}
|
||||
|
||||
protected final long[] blocks;
|
||||
protected final int valuesPerBlock;
|
||||
protected final int[] shifts;
|
||||
protected final long[] writeMasks;
|
||||
protected final long readMask;
|
||||
|
||||
Packed64SingleBlock(int valueCount, int bitsPerValue) {
|
||||
super(valueCount, bitsPerValue);
|
||||
valuesPerBlock = Long.SIZE / bitsPerValue;
|
||||
blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
|
||||
shifts = SHIFTS[bitsPerValue];
|
||||
writeMasks = WRITE_MASKS[bitsPerValue];
|
||||
readMask = ~writeMasks[0];
|
||||
}
|
||||
|
||||
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
||||
return valueCount / valuesPerBlock
|
||||
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / valuesPerBlock;
|
||||
}
|
||||
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % valuesPerBlock;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = blockOffset(index);
|
||||
final int b = offsetInBlock(index);
|
||||
|
||||
return (blocks[o] >> shifts[b]) & readMask;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = blockOffset(index);
|
||||
final int b = offsetInBlock(index);
|
||||
|
||||
blocks[o] = (blocks[o] & writeMasks[b]) | (value << shifts[b]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
Arrays.fill(blocks, 0L);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.sizeOf(blocks);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||
}
|
||||
|
||||
// Specialisations that allow the JVM to optimize computation of the block
|
||||
// offset as well as the offset in block
|
||||
|
||||
static final class Packed64SingleBlock21 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock21(int valueCount) {
|
||||
super(valueCount, 21);
|
||||
assert valuesPerBlock == 3;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 3;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 3;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock12 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock12(int valueCount) {
|
||||
super(valueCount, 12);
|
||||
assert valuesPerBlock == 5;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 5;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 5;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock10 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock10(int valueCount) {
|
||||
super(valueCount, 10);
|
||||
assert valuesPerBlock == 6;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 6;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 6;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock9 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock9(int valueCount) {
|
||||
super(valueCount, 9);
|
||||
assert valuesPerBlock == 7;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 7;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 7;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock7 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock7(int valueCount) {
|
||||
super(valueCount, 7);
|
||||
assert valuesPerBlock == 9;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 9;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 9;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock6 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock6(int valueCount) {
|
||||
super(valueCount, 6);
|
||||
assert valuesPerBlock == 10;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 10;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 10;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock5 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock5(int valueCount) {
|
||||
super(valueCount, 5);
|
||||
assert valuesPerBlock == 12;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 12;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 12;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock4 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock4(int valueCount) {
|
||||
super(valueCount, 4);
|
||||
assert valuesPerBlock == 16;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset >> 4;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset & 15;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock3 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock3(int valueCount) {
|
||||
super(valueCount, 3);
|
||||
assert valuesPerBlock == 21;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset / 21;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset % 21;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock2 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock2(int valueCount) {
|
||||
super(valueCount, 2);
|
||||
assert valuesPerBlock == 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset >> 5;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset & 31;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Packed64SingleBlock1 extends Packed64SingleBlock {
|
||||
|
||||
Packed64SingleBlock1(int valueCount) {
|
||||
super(valueCount, 1);
|
||||
assert valuesPerBlock == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int blockOffset(int offset) {
|
||||
return offset >> 6;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int offsetInBlock(int offset) {
|
||||
return offset & 63;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
|
||||
|
||||
private long pending;
|
||||
private int shift;
|
||||
private final long mask;
|
||||
private int position;
|
||||
|
||||
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
|
||||
throws IOException {
|
||||
super(valueCount, bitsPerValue, in);
|
||||
pending = 0;
|
||||
shift = 64;
|
||||
mask = ~(~0L << bitsPerValue);
|
||||
position = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long next() throws IOException {
|
||||
if (shift + bitsPerValue > 64) {
|
||||
pending = in.readLong();
|
||||
shift = 0;
|
||||
}
|
||||
final long next = (pending >>> shift) & mask;
|
||||
shift += bitsPerValue;
|
||||
++position;
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long advance(int ord) throws IOException {
|
||||
assert ord < valueCount : "ord must be less than valueCount";
|
||||
assert ord > position : "ord must be greater than the current position";
|
||||
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
|
||||
final long targetBlock = ord / valuesPerBlock;
|
||||
final long blocksToSkip = targetBlock - nextBlock;
|
||||
if (blocksToSkip > 0) {
|
||||
final long skip = blocksToSkip << 3;
|
||||
final long filePointer = in.getFilePointer();
|
||||
|
||||
in.seek(filePointer + skip);
|
||||
shift = 64;
|
||||
|
||||
final int offsetInBlock = ord % valuesPerBlock;
|
||||
for (int i = 0; i < offsetInBlock; ++i) {
|
||||
next();
|
||||
}
|
||||
} else {
|
||||
for (int i = position; i < ord - 1; ++i) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
|
||||
position = ord - 1;
|
||||
return next();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.packed.PackedInts.Writer;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link Writer} for {@link Packed64SingleBlock} readers.
|
||||
*/
|
||||
final class Packed64SingleBlockWriter extends Writer {
|
||||
|
||||
private long pending;
|
||||
private int shift;
|
||||
private int written;
|
||||
|
||||
Packed64SingleBlockWriter(DataOutput out, int valueCount,
|
||||
int bitsPerValue) throws IOException {
|
||||
super(out, valueCount, bitsPerValue);
|
||||
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
|
||||
pending = 0;
|
||||
shift = 0;
|
||||
written = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getFormat() {
|
||||
return PackedInts.PACKED_SINGLE_BLOCK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(long v) throws IOException {
|
||||
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
|
||||
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
|
||||
assert v >= 0;
|
||||
|
||||
if (shift + bitsPerValue > Long.SIZE) {
|
||||
out.writeLong(pending);
|
||||
pending = 0;
|
||||
shift = 0;
|
||||
}
|
||||
pending |= v << shift;
|
||||
shift += bitsPerValue;
|
||||
++written;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
while (written < valueCount) {
|
||||
add(0L); // Auto flush
|
||||
}
|
||||
|
||||
if (shift > 0) {
|
||||
// add was called at least once
|
||||
out.writeLong(pending);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
|
||||
+ bitsPerValue + " bits/value)";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** 24 bitsPerValue backed by byte[] */
|
||||
final class Packed8ThreeBlocks extends PackedInts.ReaderImpl
|
||||
implements PackedInts.Mutable {
|
||||
|
||||
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||
|
||||
private final byte[] blocks;
|
||||
|
||||
Packed8ThreeBlocks(int valueCount) {
|
||||
super(valueCount, 24);
|
||||
if (valueCount > MAX_SIZE) {
|
||||
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||
}
|
||||
this.blocks = new byte[3 * valueCount];
|
||||
}
|
||||
|
||||
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < blocks.length; i++) {
|
||||
blocks[i] = in.readByte();
|
||||
}
|
||||
final int mod = blocks.length % 8;
|
||||
if (mod != 0) {
|
||||
final int pad = 8 - mod;
|
||||
// round out long
|
||||
for (int i = 0; i < pad; i++) {
|
||||
in.readByte();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index * 3;
|
||||
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index * 3;
|
||||
blocks[o+2] = (byte) value;
|
||||
blocks[o+1] = (byte) (value >> 8);
|
||||
blocks[o] = (byte) (value >> 16);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
Arrays.fill(blocks, (byte) 0);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.sizeOf(blocks);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||
}
|
||||
|
||||
}
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.store.DataInput;
|
|||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -38,10 +37,33 @@ import java.io.IOException;
|
|||
|
||||
public class PackedInts {
|
||||
|
||||
/**
|
||||
* At most 700% memory overhead, always select a direct implementation.
|
||||
*/
|
||||
public static final float FASTEST = 7f;
|
||||
|
||||
/**
|
||||
* At most 50% memory overhead, always select a reasonably fast implementation.
|
||||
*/
|
||||
public static final float FAST = 0.5f;
|
||||
|
||||
/**
|
||||
* At most 20% memory overhead.
|
||||
*/
|
||||
public static final float DEFAULT = 0.2f;
|
||||
|
||||
/**
|
||||
* No memory overhead at all, but the returned implementation may be slow.
|
||||
*/
|
||||
public static final float COMPACT = 0f;
|
||||
|
||||
private final static String CODEC_NAME = "PackedInts";
|
||||
private final static int VERSION_START = 0;
|
||||
private final static int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static final int PACKED = 0;
|
||||
static final int PACKED_SINGLE_BLOCK = 1;
|
||||
|
||||
/**
|
||||
* A read-only random access array of positive integers.
|
||||
* @lucene.internal
|
||||
|
@ -103,7 +125,35 @@ public class PackedInts {
|
|||
* @throws IOException if reading the value throws an IOException*/
|
||||
long advance(int ord) throws IOException;
|
||||
}
|
||||
|
||||
|
||||
static abstract class ReaderIteratorImpl implements ReaderIterator {
|
||||
|
||||
protected final IndexInput in;
|
||||
protected final int bitsPerValue;
|
||||
protected final int valueCount;
|
||||
|
||||
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
|
||||
this.in = in;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
this.valueCount = valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBitsPerValue() {
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A packed integer array that can be modified.
|
||||
* @lucene.internal
|
||||
|
@ -118,8 +168,7 @@ public class PackedInts {
|
|||
|
||||
/**
|
||||
* Sets all values to 0.
|
||||
*/
|
||||
|
||||
*/
|
||||
void clear();
|
||||
}
|
||||
|
||||
|
@ -145,10 +194,6 @@ public class PackedInts {
|
|||
return valueCount;
|
||||
}
|
||||
|
||||
public long getMaxValue() { // Convenience method
|
||||
return maxValue(bitsPerValue);
|
||||
}
|
||||
|
||||
public Object getArray() {
|
||||
return null;
|
||||
}
|
||||
|
@ -176,8 +221,10 @@ public class PackedInts {
|
|||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
out.writeVInt(bitsPerValue);
|
||||
out.writeVInt(valueCount);
|
||||
out.writeVInt(getFormat());
|
||||
}
|
||||
|
||||
protected abstract int getFormat();
|
||||
public abstract void add(long v) throws IOException;
|
||||
public abstract void finish() throws IOException;
|
||||
}
|
||||
|
@ -185,6 +232,7 @@ public class PackedInts {
|
|||
/**
|
||||
* Retrieve PackedInt data from the DataInput and return a packed int
|
||||
* structure based on it.
|
||||
*
|
||||
* @param in positioned at the beginning of a stored packed int structure.
|
||||
* @return a read only random access capable array of positive integers.
|
||||
* @throws IOException if the structure could not be retrieved.
|
||||
|
@ -195,22 +243,30 @@ public class PackedInts {
|
|||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final int format = in.readVInt();
|
||||
|
||||
switch (bitsPerValue) {
|
||||
case 8:
|
||||
return new Direct8(in, valueCount);
|
||||
case 16:
|
||||
return new Direct16(in, valueCount);
|
||||
case 32:
|
||||
return new Direct32(in, valueCount);
|
||||
case 64:
|
||||
return new Direct64(in, valueCount);
|
||||
default:
|
||||
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
|
||||
return new Packed64(in, valueCount, bitsPerValue);
|
||||
} else {
|
||||
return new Packed32(in, valueCount, bitsPerValue);
|
||||
}
|
||||
switch (format) {
|
||||
case PACKED:
|
||||
switch (bitsPerValue) {
|
||||
case 8:
|
||||
return new Direct8(in, valueCount);
|
||||
case 16:
|
||||
return new Direct16(in, valueCount);
|
||||
case 24:
|
||||
return new Packed8ThreeBlocks(in, valueCount);
|
||||
case 32:
|
||||
return new Direct32(in, valueCount);
|
||||
case 48:
|
||||
return new Packed16ThreeBlocks(in, valueCount);
|
||||
case 64:
|
||||
return new Direct64(in, valueCount);
|
||||
default:
|
||||
return new Packed64(in, valueCount, bitsPerValue);
|
||||
}
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
|
||||
default:
|
||||
throw new AssertionError("Unknwown Writer format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -226,7 +282,15 @@ public class PackedInts {
|
|||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
return new PackedReaderIterator(bitsPerValue, valueCount, in);
|
||||
final int format = in.readVInt();
|
||||
switch (format) {
|
||||
case PACKED:
|
||||
return new PackedReaderIterator(valueCount, bitsPerValue, in);
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
|
||||
default:
|
||||
throw new AssertionError("Unknwown Writer format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -243,38 +307,70 @@ public class PackedInts {
|
|||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
return new DirectReader(bitsPerValue, valueCount, in);
|
||||
final int format = in.readVInt();
|
||||
switch (format) {
|
||||
case PACKED:
|
||||
return new DirectPackedReader(bitsPerValue, valueCount, in);
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
|
||||
default:
|
||||
throw new AssertionError("Unknwown Writer format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a packed integer array with the given amount of values initialized
|
||||
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
|
||||
* All Mutables known by this factory are kept fully in RAM.
|
||||
* @param valueCount the number of elements.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* @return a mutable packed integer array.
|
||||
*
|
||||
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
||||
* for speed by selecting a faster but potentially less memory-efficient
|
||||
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
|
||||
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
||||
* that the fastest implementation is selected.
|
||||
*
|
||||
* @param valueCount the number of elements
|
||||
* @param bitsPerValue the number of bits available for any given value
|
||||
* @param acceptableOverheadRatio an acceptable overhead
|
||||
* ratio per value
|
||||
* @return a mutable packed integer array
|
||||
* @throws java.io.IOException if the Mutable could not be created. With the
|
||||
* current implementations, this never happens, but the method
|
||||
* signature allows for future persistence-backed Mutables.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Mutable getMutable(
|
||||
int valueCount, int bitsPerValue) {
|
||||
switch (bitsPerValue) {
|
||||
case 8:
|
||||
public static Mutable getMutable(int valueCount,
|
||||
int bitsPerValue, float acceptableOverheadRatio) {
|
||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||
|
||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||
|
||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||
return new Direct8(valueCount);
|
||||
case 16:
|
||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||
return new Direct16(valueCount);
|
||||
case 32:
|
||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||
return new Direct32(valueCount);
|
||||
case 64:
|
||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||
return new Direct64(valueCount);
|
||||
default:
|
||||
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
|
||||
return new Packed64(valueCount, bitsPerValue);
|
||||
} else {
|
||||
return new Packed32(valueCount, bitsPerValue);
|
||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||
return new Packed8ThreeBlocks(valueCount);
|
||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||
return new Packed16ThreeBlocks(valueCount);
|
||||
} else {
|
||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||
if (overhead <= acceptableOverhead) {
|
||||
return Packed64SingleBlock.create(valueCount, bpv);
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Packed64(valueCount, bitsPerValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -282,16 +378,55 @@ public class PackedInts {
|
|||
* Create a packed integer array writer for the given number of values at the
|
||||
* given bits/value. Writers append to the given IndexOutput and has very
|
||||
* low memory overhead.
|
||||
*
|
||||
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
||||
* for speed by selecting a faster but potentially less memory-efficient
|
||||
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
|
||||
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
||||
* that the fastest implementation is selected.
|
||||
*
|
||||
* @param out the destination for the produced bits.
|
||||
* @param valueCount the number of elements.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
* @return a Writer ready for receiving values.
|
||||
* @throws IOException if bits could not be written to out.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue)
|
||||
public static Writer getWriter(DataOutput out,
|
||||
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
|
||||
throws IOException {
|
||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||
|
||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||
|
||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||
return new PackedWriter(out, valueCount, 8);
|
||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||
return new PackedWriter(out, valueCount, 16);
|
||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||
return new PackedWriter(out, valueCount, 32);
|
||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||
return new PackedWriter(out, valueCount, 64);
|
||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||
return new PackedWriter(out, valueCount, 24);
|
||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
||||
} else {
|
||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||
if (overhead <= acceptableOverhead) {
|
||||
return new Packed64SingleBlockWriter(out, valueCount, bpv);
|
||||
}
|
||||
}
|
||||
}
|
||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns how many bits are required to hold values up
|
||||
|
@ -301,14 +436,10 @@ public class PackedInts {
|
|||
* @lucene.internal
|
||||
*/
|
||||
public static int bitsRequired(long maxValue) {
|
||||
// Very high long values does not translate well to double, so we do an
|
||||
// explicit check for the edge cases
|
||||
if (maxValue > 0x3FFFFFFFFFFFFFFFL) {
|
||||
return 63;
|
||||
} if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
|
||||
return 62;
|
||||
if (maxValue < 0) {
|
||||
throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")");
|
||||
}
|
||||
return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)));
|
||||
return Math.max(1, 64 - Long.numberOfLeadingZeros(maxValue));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -321,26 +452,4 @@ public class PackedInts {
|
|||
public static long maxValue(int bitsPerValue) {
|
||||
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
|
||||
}
|
||||
|
||||
/** Rounds bitsPerValue up to 8, 16, 32 or 64. */
|
||||
public static int getNextFixedSize(int bitsPerValue) {
|
||||
if (bitsPerValue <= 8) {
|
||||
return 8;
|
||||
} else if (bitsPerValue <= 16) {
|
||||
return 16;
|
||||
} else if (bitsPerValue <= 32) {
|
||||
return 32;
|
||||
} else {
|
||||
return 64;
|
||||
}
|
||||
}
|
||||
|
||||
/** Possibly wastes some storage in exchange for faster lookups */
|
||||
public static int getRoundedFixedSize(int bitsPerValue) {
|
||||
if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok
|
||||
return getNextFixedSize(bitsPerValue);
|
||||
} else {
|
||||
return bitsPerValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,24 +21,18 @@ import org.apache.lucene.store.IndexInput;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
||||
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
|
||||
private long pending;
|
||||
private int pendingBitsLeft;
|
||||
private final IndexInput in;
|
||||
private final int bitsPerValue;
|
||||
private final int valueCount;
|
||||
private int position = -1;
|
||||
|
||||
// masks[n-1] masks for bottom n bits
|
||||
private final long[] masks;
|
||||
|
||||
public PackedReaderIterator(int bitsPerValue, int valueCount, IndexInput in)
|
||||
public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
|
||||
throws IOException {
|
||||
super(valueCount, bitsPerValue, in);
|
||||
|
||||
this.valueCount = valueCount;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
|
||||
this.in = in;
|
||||
masks = new long[bitsPerValue];
|
||||
|
||||
long v = 1;
|
||||
|
@ -48,14 +42,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
|||
}
|
||||
}
|
||||
|
||||
public int getBitsPerValue() {
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
public long next() throws IOException {
|
||||
if (pendingBitsLeft == 0) {
|
||||
pending = in.readLong();
|
||||
|
@ -79,10 +65,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
|||
return result;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
|
||||
public int ord() {
|
||||
return position;
|
||||
}
|
||||
|
|
|
@ -52,6 +52,11 @@ class PackedWriter extends PackedInts.Writer {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getFormat() {
|
||||
return PackedInts.PACKED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not call this after finish
|
||||
*/
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.util.Counter;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// TODO: some of this should be under lucene40 codec tests? is talking to codec directly?f
|
||||
public class TestDocValues extends LuceneTestCase {
|
||||
|
@ -71,7 +72,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
Directory dir = newDirectory();
|
||||
final Counter trackBytes = Counter.newCounter();
|
||||
DocValuesConsumer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random()),
|
||||
random().nextBoolean());
|
||||
random().nextFloat() * PackedInts.FAST);
|
||||
int maxDoc = 220;
|
||||
final String[] values = new String[maxDoc];
|
||||
final int fixedLength = 1 + atLeast(50);
|
||||
|
|
|
@ -64,6 +64,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
public class TestIndexWriter extends LuceneTestCase {
|
||||
|
||||
|
@ -1677,7 +1678,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
w.close();
|
||||
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
|
||||
|
||||
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextBoolean());
|
||||
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST);
|
||||
assertEquals(5, dti.numOrd()); // +1 for null ord
|
||||
assertEquals(4, dti.size());
|
||||
assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef()));
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.util.packed;
|
|||
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -53,10 +54,10 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
for(int nbits=1;nbits<63;nbits++) {
|
||||
final int valueCount = 100+random().nextInt(500);
|
||||
final Directory d = newDirectory();
|
||||
|
||||
|
||||
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
|
||||
PackedInts.Writer w = PackedInts.getWriter(
|
||||
out, valueCount, nbits);
|
||||
out, valueCount, nbits, random().nextFloat()*PackedInts.FASTEST);
|
||||
|
||||
final long[] values = new long[valueCount];
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
|
@ -188,16 +189,24 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
if (bitsPerValue <= 16) {
|
||||
packedInts.add(new Direct16(valueCount));
|
||||
}
|
||||
if (bitsPerValue <= 31) {
|
||||
packedInts.add(new Packed32(valueCount, bitsPerValue));
|
||||
if (bitsPerValue <= 24 && valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
|
||||
packedInts.add(new Packed8ThreeBlocks(valueCount));
|
||||
}
|
||||
if (bitsPerValue <= 32) {
|
||||
packedInts.add(new Direct32(valueCount));
|
||||
}
|
||||
if (bitsPerValue <= 48 && valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
|
||||
packedInts.add(new Packed16ThreeBlocks(valueCount));
|
||||
}
|
||||
if (bitsPerValue <= 63) {
|
||||
packedInts.add(new Packed64(valueCount, bitsPerValue));
|
||||
}
|
||||
packedInts.add(new Direct64(valueCount));
|
||||
for (int bpv = bitsPerValue; bpv <= 64; ++bpv) {
|
||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||
packedInts.add(Packed64SingleBlock.create(valueCount, bpv));
|
||||
}
|
||||
}
|
||||
return packedInts;
|
||||
}
|
||||
|
||||
|
@ -242,20 +251,26 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSingleValue() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexOutput out = dir.createOutput("out", newIOContext(random()));
|
||||
PackedInts.Writer w = PackedInts.getWriter(out, 1, 8);
|
||||
w.add(17);
|
||||
w.finish();
|
||||
final long end = out.getFilePointer();
|
||||
out.close();
|
||||
for (int bitsPerValue = 1; bitsPerValue <= 64; ++bitsPerValue) {
|
||||
Directory dir = newDirectory();
|
||||
IndexOutput out = dir.createOutput("out", newIOContext(random()));
|
||||
PackedInts.Writer w = PackedInts.getWriter(out, 1, bitsPerValue, PackedInts.DEFAULT);
|
||||
long value = 17L & PackedInts.maxValue(bitsPerValue);
|
||||
w.add(value);
|
||||
w.finish();
|
||||
final long end = out.getFilePointer();
|
||||
out.close();
|
||||
|
||||
IndexInput in = dir.openInput("out", newIOContext(random()));
|
||||
PackedInts.getReader(in);
|
||||
assertEquals(end, in.getFilePointer());
|
||||
in.close();
|
||||
IndexInput in = dir.openInput("out", newIOContext(random()));
|
||||
Reader reader = PackedInts.getReader(in);
|
||||
String msg = "Impl=" + w.getClass().getSimpleName() + ", bitsPerValue=" + bitsPerValue;
|
||||
assertEquals(msg, 1, reader.size());
|
||||
assertEquals(msg, value, reader.get(0));
|
||||
assertEquals(msg, end, in.getFilePointer());
|
||||
in.close();
|
||||
|
||||
dir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testSecondaryBlockChange() throws IOException {
|
||||
|
@ -276,15 +291,36 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
int INDEX = (int)Math.pow(2, 30)+1;
|
||||
int BITS = 2;
|
||||
|
||||
Packed32 p32 = new Packed32(INDEX, BITS);
|
||||
p32.set(INDEX-1, 1);
|
||||
assertEquals("The value at position " + (INDEX-1)
|
||||
+ " should be correct for Packed32", 1, p32.get(INDEX-1));
|
||||
p32 = null; // To free the 256MB used
|
||||
|
||||
Packed64 p64 = new Packed64(INDEX, BITS);
|
||||
p64.set(INDEX-1, 1);
|
||||
assertEquals("The value at position " + (INDEX-1)
|
||||
+ " should be correct for Packed64", 1, p64.get(INDEX-1));
|
||||
p64 = null;
|
||||
|
||||
for (int bits = 1; bits <=64; ++bits) {
|
||||
if (Packed64SingleBlock.isSupported(bits)) {
|
||||
int index = Integer.MAX_VALUE / bits + (bits == 1 ? 0 : 1);
|
||||
Packed64SingleBlock p64sb = Packed64SingleBlock.create(index, bits);
|
||||
p64sb.set(index - 1, 1);
|
||||
assertEquals("The value at position " + (index-1)
|
||||
+ " should be correct for " + p64sb.getClass().getSimpleName(),
|
||||
1, p64sb.get(index-1));
|
||||
}
|
||||
}
|
||||
|
||||
int index = Integer.MAX_VALUE / 24 + 1;
|
||||
Packed8ThreeBlocks p8 = new Packed8ThreeBlocks(index);
|
||||
p8.set(index - 1, 1);
|
||||
assertEquals("The value at position " + (index-1)
|
||||
+ " should be correct for Packed8ThreeBlocks", 1, p8.get(index-1));
|
||||
p8 = null;
|
||||
|
||||
index = Integer.MAX_VALUE / 48 + 1;
|
||||
Packed16ThreeBlocks p16 = new Packed16ThreeBlocks(index);
|
||||
p16.set(index - 1, 1);
|
||||
assertEquals("The value at position " + (index-1)
|
||||
+ " should be correct for Packed16ThreeBlocks", 1, p16.get(index-1));
|
||||
p16 = null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
|||
import org.apache.lucene.search.FieldCache.DocTerms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Use a field value and find the Document Frequency within another field.
|
||||
|
@ -52,7 +53,7 @@ public class JoinDocFreqValueSource extends FieldCacheSource {
|
|||
@Override
|
||||
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
|
||||
{
|
||||
final DocTerms terms = cache.getTerms(readerContext.reader(), field, true );
|
||||
final DocTerms terms = cache.getTerms(readerContext.reader(), field, PackedInts.FAST);
|
||||
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
|
||||
|
||||
return new IntDocValues(this) {
|
||||
|
|
Loading…
Reference in New Issue