LUCENE-4062: add new aligned packed bits impls for faster performance

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1342751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-05-25 18:09:39 +00:00
parent f4819005cf
commit 6a4a717220
29 changed files with 1095 additions and 452 deletions

View File

@ -922,6 +922,11 @@ Optimizations
and few general improvements to DirectoryTaxonomyWriter. and few general improvements to DirectoryTaxonomyWriter.
(Shai Erera, Gilad Barkai) (Shai Erera, Gilad Barkai)
* LUCENE-4062: Add new aligned packed bits impls for faster lookup
performance; add float acceptableOverheadRatio to getWriter and
getMutable API to give packed ints freedom to pick faster
implementations (Adrien Grand via Mike McCandless)
Bug fixes Bug fixes
* LUCENE-2803: The FieldCache can miss values if an entry for a reader * LUCENE-2803: The FieldCache can miss values if an entry for a reader

View File

@ -328,8 +328,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
// we'd have to try @ fewer bits and then grow // we'd have to try @ fewer bits and then grow
// if we overflowed it. // if we overflowed it.
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue()); PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue()); PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
termsDictOffsets = termsDictOffsetsM; termsDictOffsets = termsDictOffsetsM;
termOffsets = termOffsetsM; termOffsets = termOffsetsM;

View File

@ -183,7 +183,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
// write primary terms dict offsets // write primary terms dict offsets
packedIndexStart = out.getFilePointer(); packedIndexStart = out.getFilePointer();
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer)); PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer), PackedInts.DEFAULT);
// relative to our indexStart // relative to our indexStart
long upto = 0; long upto = 0;
@ -196,7 +196,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
packedOffsetsStart = out.getFilePointer(); packedOffsetsStart = out.getFilePointer();
// write offsets into the byte[] terms // write offsets into the byte[] terms
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength)); w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength), PackedInts.DEFAULT);
upto = 0; upto = 0;
for(int i=0;i<numIndexTerms;i++) { for(int i=0;i<numIndexTerms;i++) {
w.add(upto); w.add(upto);

View File

@ -74,7 +74,7 @@ class TermInfosReaderIndex {
PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, false); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT);
String currentField = null; String currentField = null;
List<String> fieldStrs = new ArrayList<String>(); List<String> fieldStrs = new ArrayList<String>();

View File

@ -115,17 +115,19 @@ public final class Bytes {
* {@link Writer}. A call to {@link Writer#finish(int)} will release * {@link Writer}. A call to {@link Writer#finish(int)} will release
* all internally used resources and frees the memory tracking * all internally used resources and frees the memory tracking
* reference. * reference.
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes * @param acceptableOverheadRatio
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for * how to trade space for speed. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}. * docvalues of type {@link Type#BYTES_FIXED_SORTED} and
* {@link Type#BYTES_VAR_SORTED}.
* @param context I/O Context * @param context I/O Context
* @return a new {@link Writer} instance * @return a new {@link Writer} instance
* @throws IOException * @throws IOException
* if the files for the writer can not be created. * if the files for the writer can not be created.
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
*/ */
public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode, public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
boolean fixedSize, Comparator<BytesRef> sortComparator, boolean fixedSize, Comparator<BytesRef> sortComparator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) Counter bytesUsed, IOContext context, float acceptableOverheadRatio)
throws IOException { throws IOException {
// TODO -- i shouldn't have to specify fixed? can // TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time? // track itself & do the write thing at write time?
@ -139,7 +141,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) { } else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context); return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) { } else if (mode == Mode.SORTED) {
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam); return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
} }
} else { } else {
if (mode == Mode.STRAIGHT) { if (mode == Mode.STRAIGHT) {
@ -147,7 +149,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) { } else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context); return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) { } else if (mode == Mode.SORTED) {
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam); return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
} }
} }
@ -382,32 +384,32 @@ public final class Bytes {
protected int lastDocId = -1; protected int lastDocId = -1;
protected int[] docToEntry; protected int[] docToEntry;
protected final BytesRefHash hash; protected final BytesRefHash hash;
protected final boolean fasterButMoreRam; protected final float acceptableOverheadRatio;
protected long maxBytes = 0; protected long maxBytes = 0;
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int codecVersion, Counter bytesUsed, IOContext context, Type type) int codecVersion, Counter bytesUsed, IOContext context, Type type)
throws IOException { throws IOException {
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator( this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type); ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, PackedInts.DEFAULT, type);
} }
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) int codecVersion, Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type)
throws IOException { throws IOException {
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator( this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type); ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, acceptableOverheadRatio, type);
} }
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator, protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException { Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type) throws IOException {
super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type); super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
hash = new BytesRefHash(new ByteBlockPool(allocator), hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray( BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed)); BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
docToEntry = new int[1]; docToEntry = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
this.fasterButMoreRam = fasterButMoreRam; this.acceptableOverheadRatio = acceptableOverheadRatio;
} }
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes) protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
@ -506,7 +508,7 @@ public final class Bytes {
protected void writeIndex(IndexOutput idxOut, int docCount, protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, int[] addresses, int[] toEntry) throws IOException { long maxValue, int[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
bitsRequired(maxValue)); PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
final int limit = docCount > docToEntry.length ? docToEntry.length final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount; : docCount;
assert toEntry.length >= limit -1; assert toEntry.length >= limit -1;
@ -530,7 +532,7 @@ public final class Bytes {
protected void writeIndex(IndexOutput idxOut, int docCount, protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, long[] addresses, int[] toEntry) throws IOException { long maxValue, long[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
bitsRequired(maxValue)); PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
final int limit = docCount > docToEntry.length ? docToEntry.length final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount; : docCount;
assert toEntry.length >= limit -1; assert toEntry.length >= limit -1;
@ -551,11 +553,6 @@ public final class Bytes {
w.finish(); w.finish();
} }
protected int bitsRequired(long maxValue){
return fasterButMoreRam ?
PackedInts.getNextFixedSize(PackedInts.bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue);
}
} }
static abstract class BytesSortedSourceBase extends SortedSource { static abstract class BytesSortedSourceBase extends SortedSource {

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.packed.PackedInts;
/** /**
* Abstract base class for PerDocConsumer implementations * Abstract base class for PerDocConsumer implementations
@ -41,7 +42,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
protected final String segmentName; protected final String segmentName;
private final Counter bytesUsed; private final Counter bytesUsed;
protected final IOContext context; protected final IOContext context;
private final boolean fasterButMoreRam; private final float acceptableOverheadRatio;
/** /**
* Filename extension for index files * Filename extension for index files
@ -57,20 +58,22 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
* @param state The state to initiate a {@link PerDocConsumer} instance * @param state The state to initiate a {@link PerDocConsumer} instance
*/ */
protected DocValuesWriterBase(PerDocWriteState state) { protected DocValuesWriterBase(PerDocWriteState state) {
this(state, true); this(state, PackedInts.FAST);
} }
/** /**
* @param state The state to initiate a {@link PerDocConsumer} instance * @param state The state to initiate a {@link PerDocConsumer} instance
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes * @param acceptableOverheadRatio
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for * how to trade space for speed. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}. * docvalues of type {@link Type#BYTES_FIXED_SORTED} and
* {@link Type#BYTES_VAR_SORTED}.
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
*/ */
protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) { protected DocValuesWriterBase(PerDocWriteState state, float acceptableOverheadRatio) {
this.segmentName = state.segmentName; this.segmentName = state.segmentName;
this.bytesUsed = state.bytesUsed; this.bytesUsed = state.bytesUsed;
this.context = state.context; this.context = state.context;
this.fasterButMoreRam = fasterButMoreRam; this.acceptableOverheadRatio = acceptableOverheadRatio;
} }
protected abstract Directory getDirectory() throws IOException; protected abstract Directory getDirectory() throws IOException;
@ -83,7 +86,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException { public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
return Writer.create(valueType, return Writer.create(valueType,
PerDocProducerBase.docValuesId(segmentName, field.number), PerDocProducerBase.docValuesId(segmentName, field.number),
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam); getDirectory(), getComparator(), bytesUsed, context, acceptableOverheadRatio);
} }

View File

@ -58,8 +58,8 @@ class FixedSortedBytesImpl {
private final Comparator<BytesRef> comp; private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp, public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException { Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED); super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_FIXED_SORTED);
this.comp = comp; this.comp = comp;
} }
@ -77,7 +77,7 @@ class FixedSortedBytesImpl {
final IndexOutput idxOut = getOrCreateIndexOut(); final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(maxOrd); idxOut.writeInt(maxOrd);
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length, final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
PackedInts.bitsRequired(maxOrd)); PackedInts.bitsRequired(maxOrd), PackedInts.DEFAULT);
for (SortedSourceSlice slice : slices) { for (SortedSourceSlice slice : slices) {
slice.writeOrds(ordsWriter); slice.writeOrds(ordsWriter);
} }

View File

@ -103,7 +103,7 @@ class PackedIntValues {
: ++maxValue - minValue; : ++maxValue - minValue;
datOut.writeLong(defaultValue); datOut.writeLong(defaultValue);
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
PackedInts.bitsRequired(maxValue - minValue)); PackedInts.bitsRequired(maxValue - minValue), PackedInts.DEFAULT);
for (int i = 0; i < lastDocID + 1; i++) { for (int i = 0; i < lastDocID + 1; i++) {
set(bytesRef, i); set(bytesRef, i);
byte[] bytes = bytesRef.bytes; byte[] bytes = bytesRef.bytes;

View File

@ -60,8 +60,8 @@ final class VarSortedBytesImpl {
private final Comparator<BytesRef> comp; private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp, public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException { Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED); super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_VAR_SORTED);
this.comp = comp; this.comp = comp;
size = 0; size = 0;
} }
@ -83,7 +83,7 @@ final class VarSortedBytesImpl {
idxOut.writeLong(maxBytes); idxOut.writeLong(maxBytes);
final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1, final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1,
PackedInts.bitsRequired(maxBytes)); PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
offsetWriter.add(0); offsetWriter.add(0);
for (int i = 0; i < maxOrd; i++) { for (int i = 0; i < maxOrd; i++) {
offsetWriter.add(offsets[i]); offsetWriter.add(offsets[i]);
@ -91,7 +91,7 @@ final class VarSortedBytesImpl {
offsetWriter.finish(); offsetWriter.finish();
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length, final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
PackedInts.bitsRequired(maxOrd-1)); PackedInts.bitsRequired(maxOrd-1), PackedInts.DEFAULT);
for (SortedSourceSlice slice : slices) { for (SortedSourceSlice slice : slices) {
slice.writeOrds(ordsWriter); slice.writeOrds(ordsWriter);
} }
@ -127,7 +127,7 @@ final class VarSortedBytesImpl {
// total bytes of data // total bytes of data
idxOut.writeLong(maxBytes); idxOut.writeLong(maxBytes);
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1, PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
bitsRequired(maxBytes)); PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
// first dump bytes data, recording index & write offset as // first dump bytes data, recording index & write offset as
// we go // we go
final BytesRef spare = new BytesRef(); final BytesRef spare = new BytesRef();

View File

@ -198,7 +198,7 @@ class VarStraightBytesImpl {
if (lastDocID == -1) { if (lastDocID == -1) {
idxOut.writeVLong(0); idxOut.writeVLong(0);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1, final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(0)); PackedInts.bitsRequired(0), PackedInts.DEFAULT);
// docCount+1 so we write sentinel // docCount+1 so we write sentinel
for (int i = 0; i < docCount+1; i++) { for (int i = 0; i < docCount+1; i++) {
w.add(0); w.add(0);
@ -208,7 +208,7 @@ class VarStraightBytesImpl {
fill(docCount, address); fill(docCount, address);
idxOut.writeVLong(address); idxOut.writeVLong(address);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1, final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(address)); PackedInts.bitsRequired(address), PackedInts.DEFAULT);
for (int i = 0; i < docCount; i++) { for (int i = 0; i < docCount; i++) {
w.add(docToAddress[i]); w.add(docToAddress[i]);
} }

View File

@ -25,6 +25,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.packed.PackedInts;
/** /**
* Abstract API for per-document stored primitive values of type <tt>byte[]</tt> * Abstract API for per-document stored primitive values of type <tt>byte[]</tt>
@ -77,14 +78,16 @@ abstract class Writer extends DocValuesConsumer {
* the {@link Directory} to create the files from. * the {@link Directory} to create the files from.
* @param bytesUsed * @param bytesUsed
* a byte-usage tracking reference * a byte-usage tracking reference
* @param fasterButMoreRam Whether the space used for packed ints should be rounded up for higher lookup performance. * @param acceptableOverheadRatio
* Currently this parameter only applies for types {@link Type#BYTES_VAR_SORTED} * how to trade space for speed. This option is only applicable for
* and {@link Type#BYTES_FIXED_SORTED}. * docvalues of type {@link Type#BYTES_FIXED_SORTED} and
* {@link Type#BYTES_VAR_SORTED}.
* @return a new {@link Writer} instance for the given {@link Type} * @return a new {@link Writer} instance for the given {@link Type}
* @throws IOException * @throws IOException
* @see PackedInts#getReader(org.apache.lucene.store.DataInput, float)
*/ */
public static DocValuesConsumer create(Type type, String id, Directory directory, public static DocValuesConsumer create(Type type, String id, Directory directory,
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException { Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
if (comp == null) { if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator(); comp = BytesRef.getUTF8SortedAsUnicodeComparator();
} }
@ -101,22 +104,22 @@ abstract class Writer extends DocValuesConsumer {
return Floats.getWriter(directory, id, bytesUsed, context, type); return Floats.getWriter(directory, id, bytesUsed, context, type);
case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp, return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
bytesUsed, context, fasterButMoreRam); bytesUsed, context, acceptableOverheadRatio);
case BYTES_FIXED_DEREF: case BYTES_FIXED_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp, return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
bytesUsed, context, fasterButMoreRam); bytesUsed, context, acceptableOverheadRatio);
case BYTES_FIXED_SORTED: case BYTES_FIXED_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp, return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
bytesUsed, context, fasterButMoreRam); bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_STRAIGHT: case BYTES_VAR_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp, return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
bytesUsed, context, fasterButMoreRam); bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_DEREF: case BYTES_VAR_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp, return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
bytesUsed, context, fasterButMoreRam); bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_SORTED: case BYTES_VAR_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp, return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
bytesUsed, context, fasterButMoreRam); bytesUsed, context, acceptableOverheadRatio);
default: default:
throw new IllegalArgumentException("Unknown Values: " + type); throw new IllegalArgumentException("Unknown Values: " + type);
} }

View File

@ -494,7 +494,7 @@ public interface FieldCache {
* faster lookups (default is "true"). Note that the * faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins", * first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */ * subsequent calls will share the same cache entry. */
public DocTerms getTerms (AtomicReader reader, String field, boolean fasterButMoreRAM) public DocTerms getTerms (AtomicReader reader, String field, float acceptableOverheadRatio)
throws IOException; throws IOException;
/** Returned by {@link #getTermsIndex} */ /** Returned by {@link #getTermsIndex} */
@ -571,7 +571,7 @@ public interface FieldCache {
* faster lookups (default is "true"). Note that the * faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins", * first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */ * subsequent calls will share the same cache entry. */
public DocTermsIndex getTermsIndex (AtomicReader reader, String field, boolean fasterButMoreRAM) public DocTermsIndex getTermsIndex (AtomicReader reader, String field, float acceptableOverheadRatio)
throws IOException; throws IOException;
/** /**

View File

@ -1071,14 +1071,12 @@ class FieldCacheImpl implements FieldCache {
} }
} }
private static boolean DEFAULT_FASTER_BUT_MORE_RAM = true;
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException { public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
return getTermsIndex(reader, field, DEFAULT_FASTER_BUT_MORE_RAM); return getTermsIndex(reader, field, PackedInts.FAST);
} }
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException { public DocTermsIndex getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false); return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
} }
static class DocTermsIndexCache extends Cache { static class DocTermsIndexCache extends Cache {
@ -1092,7 +1090,7 @@ class FieldCacheImpl implements FieldCache {
Terms terms = reader.terms(entryKey.field); Terms terms = reader.terms(entryKey.field);
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue(); final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
final PagedBytes bytes = new PagedBytes(15); final PagedBytes bytes = new PagedBytes(15);
@ -1142,8 +1140,8 @@ class FieldCacheImpl implements FieldCache {
startNumUniqueTerms = 1; startNumUniqueTerms = 1;
} }
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM); GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, fasterButMoreRAM); final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
// 0 is reserved for "unset" // 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef()); bytes.copyUsingLengthPrefix(new BytesRef());
@ -1219,11 +1217,11 @@ class FieldCacheImpl implements FieldCache {
// TODO: this if DocTermsIndex was already created, we // TODO: this if DocTermsIndex was already created, we
// should share it... // should share it...
public DocTerms getTerms(AtomicReader reader, String field) throws IOException { public DocTerms getTerms(AtomicReader reader, String field) throws IOException {
return getTerms(reader, field, DEFAULT_FASTER_BUT_MORE_RAM); return getTerms(reader, field, PackedInts.FAST);
} }
public DocTerms getTerms(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException { public DocTerms getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false); return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
} }
static final class DocTermsCache extends Cache { static final class DocTermsCache extends Cache {
@ -1237,7 +1235,7 @@ class FieldCacheImpl implements FieldCache {
Terms terms = reader.terms(entryKey.field); Terms terms = reader.terms(entryKey.field);
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue(); final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
final int termCountHardLimit = reader.maxDoc(); final int termCountHardLimit = reader.maxDoc();
@ -1268,7 +1266,7 @@ class FieldCacheImpl implements FieldCache {
startBPV = 1; startBPV = 1;
} }
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM); final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), acceptableOverheadRatio);
// pointer==0 means not set // pointer==0 means not set
bytes.copyUsingLengthPrefix(new BytesRef()); bytes.copyUsingLengthPrefix(new BytesRef());

View File

@ -0,0 +1,54 @@
package org.apache.lucene.util.packed;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
final class DirectPacked64SingleBlockReader extends PackedInts.ReaderImpl {
private final IndexInput in;
private final long startPointer;
private final int valuesPerBlock;
private final long mask;
DirectPacked64SingleBlockReader(int bitsPerValue, int valueCount,
IndexInput in) {
super(valueCount, bitsPerValue);
this.in = in;
startPointer = in.getFilePointer();
valuesPerBlock = 64 / bitsPerValue;
mask = ~(~0L << bitsPerValue);
}
@Override
public long get(int index) {
final int blockOffset = index / valuesPerBlock;
final long skip = ((long) blockOffset) << 3;
try {
in.seek(startPointer + skip);
long block = in.readLong();
final int offsetInBlock = index % valuesPerBlock;
return (block >>> (offsetInBlock * bitsPerValue)) & mask;
} catch (IOException e) {
throw new IllegalStateException("failed", e);
}
}
}

View File

@ -22,11 +22,9 @@ import org.apache.lucene.store.IndexInput;
import java.io.IOException; import java.io.IOException;
/* Reads directly from disk on each get */ /* Reads directly from disk on each get */
final class DirectReader implements PackedInts.Reader { final class DirectPackedReader extends PackedInts.ReaderImpl {
private final IndexInput in; private final IndexInput in;
private final long startPointer; private final long startPointer;
private final int bitsPerValue;
private final int valueCount;
private static final int BLOCK_BITS = Packed64.BLOCK_BITS; private static final int BLOCK_BITS = Packed64.BLOCK_BITS;
private static final int MOD_MASK = Packed64.MOD_MASK; private static final int MOD_MASK = Packed64.MOD_MASK;
@ -34,10 +32,9 @@ final class DirectReader implements PackedInts.Reader {
// masks[n-1] masks for bottom n bits // masks[n-1] masks for bottom n bits
private final long[] masks; private final long[] masks;
public DirectReader(int bitsPerValue, int valueCount, IndexInput in) public DirectPackedReader(int bitsPerValue, int valueCount, IndexInput in)
throws IOException { throws IOException {
this.valueCount = valueCount; super(valueCount, bitsPerValue);
this.bitsPerValue = bitsPerValue;
this.in = in; this.in = in;
long v = 1; long v = 1;
@ -50,26 +47,6 @@ final class DirectReader implements PackedInts.Reader {
startPointer = in.getFilePointer(); startPointer = in.getFilePointer();
} }
@Override
public int getBitsPerValue() {
return bitsPerValue;
}
@Override
public int size() {
return valueCount;
}
@Override
public boolean hasArray() {
return false;
}
@Override
public Object getArray() {
return null;
}
@Override @Override
public long get(int index) { public long get(int index) {
final long majorBitPos = (long)index * bitsPerValue; final long majorBitPos = (long)index * bitsPerValue;

View File

@ -28,22 +28,14 @@ public class GrowableWriter implements PackedInts.Mutable {
private long currentMaxValue; private long currentMaxValue;
private PackedInts.Mutable current; private PackedInts.Mutable current;
private final boolean roundFixedSize; private final float acceptableOverheadRatio;
public GrowableWriter(int startBitsPerValue, int valueCount, boolean roundFixedSize) { public GrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
this.roundFixedSize = roundFixedSize; this.acceptableOverheadRatio = acceptableOverheadRatio;
current = PackedInts.getMutable(valueCount, getSize(startBitsPerValue)); current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
currentMaxValue = PackedInts.maxValue(current.getBitsPerValue()); currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
} }
private final int getSize(int bpv) {
if (roundFixedSize) {
return PackedInts.getNextFixedSize(bpv);
} else {
return bpv;
}
}
public long get(int index) { public long get(int index) {
return current.get(index); return current.get(index);
} }
@ -78,7 +70,7 @@ public class GrowableWriter implements PackedInts.Mutable {
currentMaxValue *= 2; currentMaxValue *= 2;
} }
final int valueCount = size(); final int valueCount = size();
PackedInts.Mutable next = PackedInts.getMutable(valueCount, getSize(bpv)); PackedInts.Mutable next = PackedInts.getMutable(valueCount, bpv, acceptableOverheadRatio);
for(int i=0;i<valueCount;i++) { for(int i=0;i<valueCount;i++) {
next.set(i, current.get(i)); next.set(i, current.get(i));
} }
@ -93,11 +85,12 @@ public class GrowableWriter implements PackedInts.Mutable {
} }
public GrowableWriter resize(int newSize) { public GrowableWriter resize(int newSize) {
GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, roundFixedSize); GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
final int limit = Math.min(size(), newSize); final int limit = Math.min(size(), newSize);
for(int i=0;i<limit;i++) { for(int i=0;i<limit;i++) {
next.set(i, get(i)); next.set(i, get(i));
} }
return next; return next;
} }
} }

View File

@ -0,0 +1,85 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** 48 bitsPerValue backed by short[] */
final class Packed16ThreeBlocks extends PackedInts.ReaderImpl
implements PackedInts.Mutable {
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final short[] blocks;
Packed16ThreeBlocks(int valueCount) {
super(valueCount, 48);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
this.blocks = new short[3 * valueCount];
}
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < blocks.length; i++) {
blocks[i] = in.readShort();
}
final int mod = blocks.length % 4;
if (mod != 0) {
final int pad = 4 - mod;
// round out long
for (int i = 0; i < pad; i++) {
in.readShort();
}
}
}
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o] = (short) (value >> 32);
blocks[o+1] = (short) (value >> 16);
blocks[o+2] = (short) value;
}
@Override
public void clear() {
Arrays.fill(blocks, (short) 0);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
}

View File

@ -1,227 +0,0 @@
package org.apache.lucene.util.packed;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/**
* Space optimized random access capable array of values with a fixed number of
* bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher
* numbers.
* </p><p>
* The implementation strives to avoid conditionals and expensive operations,
* sacrificing code clarity to achieve better performance.
*/
class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
static final int BLOCK_SIZE = 32; // 32 = int, 64 = long
static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
private static final int ENTRY_SIZE = BLOCK_SIZE + 1;
private static final int FAC_BITPOS = 3;
/*
* In order to make an efficient value-getter, conditionals should be
* avoided. A value can be positioned inside of a block, requiring shifting
* left or right or it can span two blocks, requiring a left-shift on the
* first block and a right-shift on the right block.
* </p><p>
* By always shifting the first block both left and right, we get exactly
* the right bits. By always shifting the second block right and applying
* a mask, we get the right bits there. After that, we | the two bitsets.
*/
private static final int[][] SHIFTS =
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE];
static { // Generate shifts
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
int[] currentShifts = SHIFTS[elementBits];
int base = bitPos * FAC_BITPOS;
currentShifts[base ] = bitPos;
currentShifts[base + 1] = BLOCK_SIZE - elementBits;
if (bitPos <= BLOCK_SIZE - elementBits) { // Single block
currentShifts[base + 2] = 0;
MASKS[elementBits][bitPos] = 0;
} else { // Two blocks
int rBits = elementBits - (BLOCK_SIZE - bitPos);
currentShifts[base + 2] = BLOCK_SIZE - rBits;
MASKS[elementBits][bitPos] = ~(~0 << rBits);
}
}
}
}
/*
* The setter requires more masking than the getter.
*/
private static final int[][] WRITE_MASKS =
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
static {
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
int elementPosMask = ~(~0 << elementBits);
int[] currentShifts = SHIFTS[elementBits];
int[] currentMasks = WRITE_MASKS[elementBits];
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
int base = bitPos * FAC_BITPOS;
currentMasks[base ] =~((elementPosMask
<< currentShifts[base + 1])
>>> currentShifts[base]);
if (bitPos <= BLOCK_SIZE - elementBits) { // Second block not used
currentMasks[base+1] = ~0; // Keep all bits
currentMasks[base+2] = 0; // Or with 0
} else {
currentMasks[base+1] = ~(elementPosMask
<< currentShifts[base + 2]);
currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0;
}
}
}
}
/* The bits */
private int[] blocks;
// Cached calculations
private int maxPos; // blocks.length * BLOCK_SIZE / bitsPerValue - 1
private int[] shifts; // The shifts for the current bitsPerValue
private int[] readMasks;
private int[] writeMasks;
/**
* Creates an array with the internal structures adjusted for the given
* limits and initialized to 0.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* Note: bitsPerValue >32 is not supported by this implementation.
*/
public Packed32(int valueCount, int bitsPerValue) {
this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)],
valueCount, bitsPerValue);
}
/**
* Creates an array with content retrieved from the given DataInput.
* @param in a DataInput, positioned at the start of Packed64-content.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @throws java.io.IOException if the values for the backing array could not
* be retrieved.
*/
public Packed32(DataInput in, int valueCount, int bitsPerValue)
throws IOException {
super(valueCount, bitsPerValue);
int size = size(bitsPerValue, valueCount);
blocks = new int[size + 1]; // +1 due to non-conditional tricks
// TODO: find a faster way to bulk-read ints...
for(int i = 0 ; i < size ; i++) {
blocks[i] = in.readInt();
}
if (size % 2 == 1) {
in.readInt(); // Align to long
}
updateCached();
}
private static int size(int bitsPerValue, int valueCount) {
final long totBitCount = (long) valueCount * bitsPerValue;
return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1));
}
/**
* Creates an array backed by the given blocks.
* </p><p>
* Note: The blocks are used directly, so changes to the given block will
* affect the Packed32-structure.
* @param blocks used as the internal backing array.
* @param valueCount the number of values.
* @param bitsPerValue the number of bits available for any given value.
* Note: bitsPerValue >32 is not supported by this implementation.
*/
public Packed32(int[] blocks, int valueCount, int bitsPerValue) {
// TODO: Check that blocks.length is sufficient for holding length values
super(valueCount, bitsPerValue);
if (bitsPerValue > 31) {
throw new IllegalArgumentException(String.format(
"This array only supports values of 31 bits or less. The "
+ "required number of bits was %d. The Packed64 "
+ "implementation allows values with more than 31 bits",
bitsPerValue));
}
this.blocks = blocks;
updateCached();
}
private void updateCached() {
readMasks = MASKS[bitsPerValue];
maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2);
shifts = SHIFTS[bitsPerValue];
writeMasks = WRITE_MASKS[bitsPerValue];
}
/**
* @param index the position of the value.
* @return the value at the given index.
*/
public long get(final int index) {
assert index >= 0 && index < size();
final long majorBitPos = (long)index * bitsPerValue;
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
final int base = bitPos * FAC_BITPOS;
return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
}
public void set(final int index, final long value) {
final int intValue = (int)value;
final long majorBitPos = (long)index * bitsPerValue;
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
final int base = bitPos * FAC_BITPOS;
blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base])
| (intValue << shifts[base + 1] >>> shifts[base]);
blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1])
| ((intValue << shifts[base + 2])
& writeMasks[base+2]);
}
public void clear() {
Arrays.fill(blocks, 0);
}
@Override
public String toString() {
return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos
+ ", elements.length=" + blocks.length + ")";
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
}

View File

@ -0,0 +1,365 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* This class is similar to {@link Packed64} except that it trades space for
* speed by ensuring that a single block needs to be read/written in order to
* read/write a value.
*/
abstract class Packed64SingleBlock extends PackedInts.ReaderImpl
implements PackedInts.Mutable {
private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] {1, 2, 3, 4,
5, 6, 7, 9, 10, 12, 21};
private static final long[][] WRITE_MASKS = new long[22][];
private static final int[][] SHIFTS = new int[22][];
static {
for (int bpv : SUPPORTED_BITS_PER_VALUE) {
initMasks(bpv);
}
}
protected static void initMasks(int bpv) {
int valuesPerBlock = Long.SIZE / bpv;
long[] writeMasks = new long[valuesPerBlock];
int[] shifts = new int[valuesPerBlock];
long bits = (1L << bpv) - 1;
for (int i = 0; i < valuesPerBlock; ++i) {
shifts[i] = bpv * i;
writeMasks[i] = ~(bits << shifts[i]);
}
WRITE_MASKS[bpv] = writeMasks;
SHIFTS[bpv] = shifts;
}
public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {
switch (bitsPerValue) {
case 1:
return new Packed64SingleBlock1(valueCount);
case 2:
return new Packed64SingleBlock2(valueCount);
case 3:
return new Packed64SingleBlock3(valueCount);
case 4:
return new Packed64SingleBlock4(valueCount);
case 5:
return new Packed64SingleBlock5(valueCount);
case 6:
return new Packed64SingleBlock6(valueCount);
case 7:
return new Packed64SingleBlock7(valueCount);
case 9:
return new Packed64SingleBlock9(valueCount);
case 10:
return new Packed64SingleBlock10(valueCount);
case 12:
return new Packed64SingleBlock12(valueCount);
case 21:
return new Packed64SingleBlock21(valueCount);
default:
throw new IllegalArgumentException("Unsupported bitsPerValue: "
+ bitsPerValue);
}
}
public static Packed64SingleBlock create(DataInput in,
int valueCount, int bitsPerValue) throws IOException {
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
public static boolean isSupported(int bitsPerValue) {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
public static float overheadPerValue(int bitsPerValue) {
int valuesPerBlock = 64 / bitsPerValue;
int overhead = 64 % bitsPerValue;
return (float) overhead / valuesPerBlock;
}
protected final long[] blocks;
protected final int valuesPerBlock;
protected final int[] shifts;
protected final long[] writeMasks;
protected final long readMask;
Packed64SingleBlock(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
valuesPerBlock = Long.SIZE / bitsPerValue;
blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
shifts = SHIFTS[bitsPerValue];
writeMasks = WRITE_MASKS[bitsPerValue];
readMask = ~writeMasks[0];
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
}
protected int blockOffset(int offset) {
return offset / valuesPerBlock;
}
protected int offsetInBlock(int offset) {
return offset % valuesPerBlock;
}
@Override
public long get(int index) {
final int o = blockOffset(index);
final int b = offsetInBlock(index);
return (blocks[o] >> shifts[b]) & readMask;
}
@Override
public void set(int index, long value) {
final int o = blockOffset(index);
final int b = offsetInBlock(index);
blocks[o] = (blocks[o] & writeMasks[b]) | (value << shifts[b]);
}
@Override
public void clear() {
Arrays.fill(blocks, 0L);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
// Specialisations that allow the JVM to optimize computation of the block
// offset as well as the offset in block
static final class Packed64SingleBlock21 extends Packed64SingleBlock {
Packed64SingleBlock21(int valueCount) {
super(valueCount, 21);
assert valuesPerBlock == 3;
}
@Override
protected int blockOffset(int offset) {
return offset / 3;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 3;
}
}
static final class Packed64SingleBlock12 extends Packed64SingleBlock {
Packed64SingleBlock12(int valueCount) {
super(valueCount, 12);
assert valuesPerBlock == 5;
}
@Override
protected int blockOffset(int offset) {
return offset / 5;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 5;
}
}
static final class Packed64SingleBlock10 extends Packed64SingleBlock {
Packed64SingleBlock10(int valueCount) {
super(valueCount, 10);
assert valuesPerBlock == 6;
}
@Override
protected int blockOffset(int offset) {
return offset / 6;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 6;
}
}
static final class Packed64SingleBlock9 extends Packed64SingleBlock {
Packed64SingleBlock9(int valueCount) {
super(valueCount, 9);
assert valuesPerBlock == 7;
}
@Override
protected int blockOffset(int offset) {
return offset / 7;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 7;
}
}
static final class Packed64SingleBlock7 extends Packed64SingleBlock {
Packed64SingleBlock7(int valueCount) {
super(valueCount, 7);
assert valuesPerBlock == 9;
}
@Override
protected int blockOffset(int offset) {
return offset / 9;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 9;
}
}
static final class Packed64SingleBlock6 extends Packed64SingleBlock {
Packed64SingleBlock6(int valueCount) {
super(valueCount, 6);
assert valuesPerBlock == 10;
}
@Override
protected int blockOffset(int offset) {
return offset / 10;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 10;
}
}
static final class Packed64SingleBlock5 extends Packed64SingleBlock {
Packed64SingleBlock5(int valueCount) {
super(valueCount, 5);
assert valuesPerBlock == 12;
}
@Override
protected int blockOffset(int offset) {
return offset / 12;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 12;
}
}
static final class Packed64SingleBlock4 extends Packed64SingleBlock {
Packed64SingleBlock4(int valueCount) {
super(valueCount, 4);
assert valuesPerBlock == 16;
}
@Override
protected int blockOffset(int offset) {
return offset >> 4;
}
@Override
protected int offsetInBlock(int offset) {
return offset & 15;
}
}
static final class Packed64SingleBlock3 extends Packed64SingleBlock {
Packed64SingleBlock3(int valueCount) {
super(valueCount, 3);
assert valuesPerBlock == 21;
}
@Override
protected int blockOffset(int offset) {
return offset / 21;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 21;
}
}
static final class Packed64SingleBlock2 extends Packed64SingleBlock {
Packed64SingleBlock2(int valueCount) {
super(valueCount, 2);
assert valuesPerBlock == 32;
}
@Override
protected int blockOffset(int offset) {
return offset >> 5;
}
@Override
protected int offsetInBlock(int offset) {
return offset & 31;
}
}
static final class Packed64SingleBlock1 extends Packed64SingleBlock {
Packed64SingleBlock1(int valueCount) {
super(valueCount, 1);
assert valuesPerBlock == 64;
}
@Override
protected int blockOffset(int offset) {
return offset >> 6;
}
@Override
protected int offsetInBlock(int offset) {
return offset & 63;
}
}
}

View File

@ -0,0 +1,88 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
private long pending;
private int shift;
private final long mask;
private int position;
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
throws IOException {
super(valueCount, bitsPerValue, in);
pending = 0;
shift = 64;
mask = ~(~0L << bitsPerValue);
position = -1;
}
@Override
public long next() throws IOException {
if (shift + bitsPerValue > 64) {
pending = in.readLong();
shift = 0;
}
final long next = (pending >>> shift) & mask;
shift += bitsPerValue;
++position;
return next;
}
@Override
public int ord() {
return position;
}
@Override
public long advance(int ord) throws IOException {
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final int valuesPerBlock = 64 / bitsPerValue;
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
final long targetBlock = ord / valuesPerBlock;
final long blocksToSkip = targetBlock - nextBlock;
if (blocksToSkip > 0) {
final long skip = blocksToSkip << 3;
final long filePointer = in.getFilePointer();
in.seek(filePointer + skip);
shift = 64;
final int offsetInBlock = ord % valuesPerBlock;
for (int i = 0; i < offsetInBlock; ++i) {
next();
}
} else {
for (int i = position; i < ord - 1; ++i) {
next();
}
}
position = ord - 1;
return next();
}
}

View File

@ -0,0 +1,81 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.PackedInts.Writer;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link Writer} for {@link Packed64SingleBlock} readers.
*/
final class Packed64SingleBlockWriter extends Writer {
private long pending;
private int shift;
private int written;
Packed64SingleBlockWriter(DataOutput out, int valueCount,
int bitsPerValue) throws IOException {
super(out, valueCount, bitsPerValue);
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
pending = 0;
shift = 0;
written = 0;
}
@Override
protected int getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK;
}
@Override
public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
assert v >= 0;
if (shift + bitsPerValue > Long.SIZE) {
out.writeLong(pending);
pending = 0;
shift = 0;
}
pending |= v << shift;
shift += bitsPerValue;
++written;
}
@Override
public void finish() throws IOException {
while (written < valueCount) {
add(0L); // Auto flush
}
if (shift > 0) {
// add was called at least once
out.writeLong(pending);
}
}
@Override
public String toString() {
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
+ bitsPerValue + " bits/value)";
}
}

View File

@ -0,0 +1,86 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** 24 bitsPerValue backed by byte[] */
final class Packed8ThreeBlocks extends PackedInts.ReaderImpl
implements PackedInts.Mutable {
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final byte[] blocks;
Packed8ThreeBlocks(int valueCount) {
super(valueCount, 24);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
this.blocks = new byte[3 * valueCount];
}
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < blocks.length; i++) {
blocks[i] = in.readByte();
}
final int mod = blocks.length % 8;
if (mod != 0) {
final int pad = 8 - mod;
// round out long
for (int i = 0; i < pad; i++) {
in.readByte();
}
}
}
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o+2] = (byte) value;
blocks[o+1] = (byte) (value >> 8);
blocks[o] = (byte) (value >> 16);
}
@Override
public void clear() {
Arrays.fill(blocks, (byte) 0);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
}

View File

@ -23,7 +23,6 @@ import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Constants;
import java.io.IOException; import java.io.IOException;
@ -38,10 +37,33 @@ import java.io.IOException;
public class PackedInts { public class PackedInts {
/**
* At most 700% memory overhead, always select a direct implementation.
*/
public static final float FASTEST = 7f;
/**
* At most 50% memory overhead, always select a reasonably fast implementation.
*/
public static final float FAST = 0.5f;
/**
* At most 20% memory overhead.
*/
public static final float DEFAULT = 0.2f;
/**
* No memory overhead at all, but the returned implementation may be slow.
*/
public static final float COMPACT = 0f;
private final static String CODEC_NAME = "PackedInts"; private final static String CODEC_NAME = "PackedInts";
private final static int VERSION_START = 0; private final static int VERSION_START = 0;
private final static int VERSION_CURRENT = VERSION_START; private final static int VERSION_CURRENT = VERSION_START;
static final int PACKED = 0;
static final int PACKED_SINGLE_BLOCK = 1;
/** /**
* A read-only random access array of positive integers. * A read-only random access array of positive integers.
* @lucene.internal * @lucene.internal
@ -104,6 +126,34 @@ public class PackedInts {
long advance(int ord) throws IOException; long advance(int ord) throws IOException;
} }
static abstract class ReaderIteratorImpl implements ReaderIterator {
protected final IndexInput in;
protected final int bitsPerValue;
protected final int valueCount;
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
this.in = in;
this.bitsPerValue = bitsPerValue;
this.valueCount = valueCount;
}
@Override
public int getBitsPerValue() {
return bitsPerValue;
}
@Override
public int size() {
return valueCount;
}
@Override
public void close() throws IOException {
in.close();
}
}
/** /**
* A packed integer array that can be modified. * A packed integer array that can be modified.
* @lucene.internal * @lucene.internal
@ -119,7 +169,6 @@ public class PackedInts {
/** /**
* Sets all values to 0. * Sets all values to 0.
*/ */
void clear(); void clear();
} }
@ -145,10 +194,6 @@ public class PackedInts {
return valueCount; return valueCount;
} }
public long getMaxValue() { // Convenience method
return maxValue(bitsPerValue);
}
public Object getArray() { public Object getArray() {
return null; return null;
} }
@ -176,8 +221,10 @@ public class PackedInts {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(bitsPerValue); out.writeVInt(bitsPerValue);
out.writeVInt(valueCount); out.writeVInt(valueCount);
out.writeVInt(getFormat());
} }
protected abstract int getFormat();
public abstract void add(long v) throws IOException; public abstract void add(long v) throws IOException;
public abstract void finish() throws IOException; public abstract void finish() throws IOException;
} }
@ -185,6 +232,7 @@ public class PackedInts {
/** /**
* Retrieve PackedInt data from the DataInput and return a packed int * Retrieve PackedInt data from the DataInput and return a packed int
* structure based on it. * structure based on it.
*
* @param in positioned at the beginning of a stored packed int structure. * @param in positioned at the beginning of a stored packed int structure.
* @return a read only random access capable array of positive integers. * @return a read only random access capable array of positive integers.
* @throws IOException if the structure could not be retrieved. * @throws IOException if the structure could not be retrieved.
@ -195,22 +243,30 @@ public class PackedInts {
final int bitsPerValue = in.readVInt(); final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt(); final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (format) {
case PACKED:
switch (bitsPerValue) { switch (bitsPerValue) {
case 8: case 8:
return new Direct8(in, valueCount); return new Direct8(in, valueCount);
case 16: case 16:
return new Direct16(in, valueCount); return new Direct16(in, valueCount);
case 24:
return new Packed8ThreeBlocks(in, valueCount);
case 32: case 32:
return new Direct32(in, valueCount); return new Direct32(in, valueCount);
case 48:
return new Packed16ThreeBlocks(in, valueCount);
case 64: case 64:
return new Direct64(in, valueCount); return new Direct64(in, valueCount);
default: default:
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
return new Packed64(in, valueCount, bitsPerValue); return new Packed64(in, valueCount, bitsPerValue);
} else {
return new Packed32(in, valueCount, bitsPerValue);
} }
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknwown Writer format: " + format);
} }
} }
@ -226,7 +282,15 @@ public class PackedInts {
final int bitsPerValue = in.readVInt(); final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt(); final int valueCount = in.readVInt();
return new PackedReaderIterator(bitsPerValue, valueCount, in); final int format = in.readVInt();
switch (format) {
case PACKED:
return new PackedReaderIterator(valueCount, bitsPerValue, in);
case PACKED_SINGLE_BLOCK:
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
} }
/** /**
@ -243,55 +307,126 @@ public class PackedInts {
final int bitsPerValue = in.readVInt(); final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt(); final int valueCount = in.readVInt();
return new DirectReader(bitsPerValue, valueCount, in); final int format = in.readVInt();
switch (format) {
case PACKED:
return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
} }
/** /**
* Create a packed integer array with the given amount of values initialized * Create a packed integer array with the given amount of values initialized
* to 0. the valueCount and the bitsPerValue cannot be changed after creation. * to 0. the valueCount and the bitsPerValue cannot be changed after creation.
* All Mutables known by this factory are kept fully in RAM. * All Mutables known by this factory are kept fully in RAM.
* @param valueCount the number of elements. *
* @param bitsPerValue the number of bits available for any given value. * Positive values of <code>acceptableOverheadRatio</code> will trade space
* @return a mutable packed integer array. * for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
* that the fastest implementation is selected.
*
* @param valueCount the number of elements
* @param bitsPerValue the number of bits available for any given value
* @param acceptableOverheadRatio an acceptable overhead
* ratio per value
* @return a mutable packed integer array
* @throws java.io.IOException if the Mutable could not be created. With the * @throws java.io.IOException if the Mutable could not be created. With the
* current implementations, this never happens, but the method * current implementations, this never happens, but the method
* signature allows for future persistence-backed Mutables. * signature allows for future persistence-backed Mutables.
* @lucene.internal * @lucene.internal
*/ */
public static Mutable getMutable( public static Mutable getMutable(int valueCount,
int valueCount, int bitsPerValue) { int bitsPerValue, float acceptableOverheadRatio) {
switch (bitsPerValue) { acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
case 8: acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
return new Direct8(valueCount); return new Direct8(valueCount);
case 16: } else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return new Direct16(valueCount); return new Direct16(valueCount);
case 32: } else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return new Direct32(valueCount); return new Direct32(valueCount);
case 64: } else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return new Direct64(valueCount); return new Direct64(valueCount);
default: } else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) { return new Packed8ThreeBlocks(valueCount);
return new Packed64(valueCount, bitsPerValue); } else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return new Packed16ThreeBlocks(valueCount);
} else { } else {
return new Packed32(valueCount, bitsPerValue); for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return Packed64SingleBlock.create(valueCount, bpv);
} }
} }
} }
return new Packed64(valueCount, bitsPerValue);
}
}
/** /**
* Create a packed integer array writer for the given number of values at the * Create a packed integer array writer for the given number of values at the
* given bits/value. Writers append to the given IndexOutput and has very * given bits/value. Writers append to the given IndexOutput and has very
* low memory overhead. * low memory overhead.
*
* Positive values of <code>acceptableOverheadRatio</code> will trade space
* for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
* that the fastest implementation is selected.
*
* @param out the destination for the produced bits. * @param out the destination for the produced bits.
* @param valueCount the number of elements. * @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value. * @param bitsPerValue the number of bits available for any given value.
* @param acceptableOverheadRatio an acceptable overhead ratio per value
* @return a Writer ready for receiving values. * @return a Writer ready for receiving values.
* @throws IOException if bits could not be written to out. * @throws IOException if bits could not be written to out.
* @lucene.internal * @lucene.internal
*/ */
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue) public static Writer getWriter(DataOutput out,
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
throws IOException { throws IOException {
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
return new PackedWriter(out, valueCount, 8);
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return new PackedWriter(out, valueCount, 16);
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return new PackedWriter(out, valueCount, 32);
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return new PackedWriter(out, valueCount, 64);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
return new PackedWriter(out, valueCount, 24);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return new PackedWriter(out, valueCount, bitsPerValue); return new PackedWriter(out, valueCount, bitsPerValue);
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return new Packed64SingleBlockWriter(out, valueCount, bpv);
}
}
}
return new PackedWriter(out, valueCount, bitsPerValue);
}
} }
/** Returns how many bits are required to hold values up /** Returns how many bits are required to hold values up
@ -301,14 +436,10 @@ public class PackedInts {
* @lucene.internal * @lucene.internal
*/ */
public static int bitsRequired(long maxValue) { public static int bitsRequired(long maxValue) {
// Very high long values does not translate well to double, so we do an if (maxValue < 0) {
// explicit check for the edge cases throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")");
if (maxValue > 0x3FFFFFFFFFFFFFFFL) {
return 63;
} if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
return 62;
} }
return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0))); return Math.max(1, 64 - Long.numberOfLeadingZeros(maxValue));
} }
/** /**
@ -321,26 +452,4 @@ public class PackedInts {
public static long maxValue(int bitsPerValue) { public static long maxValue(int bitsPerValue) {
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue); return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
} }
/** Rounds bitsPerValue up to 8, 16, 32 or 64. */
public static int getNextFixedSize(int bitsPerValue) {
if (bitsPerValue <= 8) {
return 8;
} else if (bitsPerValue <= 16) {
return 16;
} else if (bitsPerValue <= 32) {
return 32;
} else {
return 64;
}
}
/** Possibly wastes some storage in exchange for faster lookups */
public static int getRoundedFixedSize(int bitsPerValue) {
if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok
return getNextFixedSize(bitsPerValue);
} else {
return bitsPerValue;
}
}
} }

View File

@ -21,24 +21,18 @@ import org.apache.lucene.store.IndexInput;
import java.io.IOException; import java.io.IOException;
final class PackedReaderIterator implements PackedInts.ReaderIterator { final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
private long pending; private long pending;
private int pendingBitsLeft; private int pendingBitsLeft;
private final IndexInput in;
private final int bitsPerValue;
private final int valueCount;
private int position = -1; private int position = -1;
// masks[n-1] masks for bottom n bits // masks[n-1] masks for bottom n bits
private final long[] masks; private final long[] masks;
public PackedReaderIterator(int bitsPerValue, int valueCount, IndexInput in) public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
throws IOException { throws IOException {
super(valueCount, bitsPerValue, in);
this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue;
this.in = in;
masks = new long[bitsPerValue]; masks = new long[bitsPerValue];
long v = 1; long v = 1;
@ -48,14 +42,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
} }
} }
public int getBitsPerValue() {
return bitsPerValue;
}
public int size() {
return valueCount;
}
public long next() throws IOException { public long next() throws IOException {
if (pendingBitsLeft == 0) { if (pendingBitsLeft == 0) {
pending = in.readLong(); pending = in.readLong();
@ -79,10 +65,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
return result; return result;
} }
public void close() throws IOException {
in.close();
}
public int ord() { public int ord() {
return position; return position;
} }

View File

@ -52,6 +52,11 @@ class PackedWriter extends PackedInts.Writer {
} }
} }
@Override
protected int getFormat() {
return PackedInts.PACKED;
}
/** /**
* Do not call this after finish * Do not call this after finish
*/ */

View File

@ -40,6 +40,7 @@ import org.apache.lucene.util.Counter;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.packed.PackedInts;
// TODO: some of this should be under lucene40 codec tests? is talking to codec directly?f // TODO: some of this should be under lucene40 codec tests? is talking to codec directly?f
public class TestDocValues extends LuceneTestCase { public class TestDocValues extends LuceneTestCase {
@ -71,7 +72,7 @@ public class TestDocValues extends LuceneTestCase {
Directory dir = newDirectory(); Directory dir = newDirectory();
final Counter trackBytes = Counter.newCounter(); final Counter trackBytes = Counter.newCounter();
DocValuesConsumer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random()), DocValuesConsumer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random()),
random().nextBoolean()); random().nextFloat() * PackedInts.FAST);
int maxDoc = 220; int maxDoc = 220;
final String[] values = new String[maxDoc]; final String[] values = new String[maxDoc];
final int fixedLength = 1 + atLeast(50); final int fixedLength = 1 + atLeast(50);

View File

@ -64,6 +64,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.packed.PackedInts;
public class TestIndexWriter extends LuceneTestCase { public class TestIndexWriter extends LuceneTestCase {
@ -1677,7 +1678,7 @@ public class TestIndexWriter extends LuceneTestCase {
w.close(); w.close();
assertEquals(1, reader.docFreq(new Term("content", bigTerm))); assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextBoolean()); FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST);
assertEquals(5, dti.numOrd()); // +1 for null ord assertEquals(5, dti.numOrd()); // +1 for null ord
assertEquals(4, dti.size()); assertEquals(4, dti.size());
assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef())); assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef()));

View File

@ -19,6 +19,7 @@ package org.apache.lucene.util.packed;
import org.apache.lucene.store.*; import org.apache.lucene.store.*;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.packed.PackedInts.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -56,7 +57,7 @@ public class TestPackedInts extends LuceneTestCase {
IndexOutput out = d.createOutput("out.bin", newIOContext(random())); IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
PackedInts.Writer w = PackedInts.getWriter( PackedInts.Writer w = PackedInts.getWriter(
out, valueCount, nbits); out, valueCount, nbits, random().nextFloat()*PackedInts.FASTEST);
final long[] values = new long[valueCount]; final long[] values = new long[valueCount];
for(int i=0;i<valueCount;i++) { for(int i=0;i<valueCount;i++) {
@ -188,16 +189,24 @@ public class TestPackedInts extends LuceneTestCase {
if (bitsPerValue <= 16) { if (bitsPerValue <= 16) {
packedInts.add(new Direct16(valueCount)); packedInts.add(new Direct16(valueCount));
} }
if (bitsPerValue <= 31) { if (bitsPerValue <= 24 && valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
packedInts.add(new Packed32(valueCount, bitsPerValue)); packedInts.add(new Packed8ThreeBlocks(valueCount));
} }
if (bitsPerValue <= 32) { if (bitsPerValue <= 32) {
packedInts.add(new Direct32(valueCount)); packedInts.add(new Direct32(valueCount));
} }
if (bitsPerValue <= 48 && valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
packedInts.add(new Packed16ThreeBlocks(valueCount));
}
if (bitsPerValue <= 63) { if (bitsPerValue <= 63) {
packedInts.add(new Packed64(valueCount, bitsPerValue)); packedInts.add(new Packed64(valueCount, bitsPerValue));
} }
packedInts.add(new Direct64(valueCount)); packedInts.add(new Direct64(valueCount));
for (int bpv = bitsPerValue; bpv <= 64; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
packedInts.add(Packed64SingleBlock.create(valueCount, bpv));
}
}
return packedInts; return packedInts;
} }
@ -242,21 +251,27 @@ public class TestPackedInts extends LuceneTestCase {
} }
public void testSingleValue() throws Exception { public void testSingleValue() throws Exception {
for (int bitsPerValue = 1; bitsPerValue <= 64; ++bitsPerValue) {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexOutput out = dir.createOutput("out", newIOContext(random())); IndexOutput out = dir.createOutput("out", newIOContext(random()));
PackedInts.Writer w = PackedInts.getWriter(out, 1, 8); PackedInts.Writer w = PackedInts.getWriter(out, 1, bitsPerValue, PackedInts.DEFAULT);
w.add(17); long value = 17L & PackedInts.maxValue(bitsPerValue);
w.add(value);
w.finish(); w.finish();
final long end = out.getFilePointer(); final long end = out.getFilePointer();
out.close(); out.close();
IndexInput in = dir.openInput("out", newIOContext(random())); IndexInput in = dir.openInput("out", newIOContext(random()));
PackedInts.getReader(in); Reader reader = PackedInts.getReader(in);
assertEquals(end, in.getFilePointer()); String msg = "Impl=" + w.getClass().getSimpleName() + ", bitsPerValue=" + bitsPerValue;
assertEquals(msg, 1, reader.size());
assertEquals(msg, value, reader.get(0));
assertEquals(msg, end, in.getFilePointer());
in.close(); in.close();
dir.close(); dir.close();
} }
}
public void testSecondaryBlockChange() throws IOException { public void testSecondaryBlockChange() throws IOException {
PackedInts.Mutable mutable = new Packed64(26, 5); PackedInts.Mutable mutable = new Packed64(26, 5);
@ -276,15 +291,36 @@ public class TestPackedInts extends LuceneTestCase {
int INDEX = (int)Math.pow(2, 30)+1; int INDEX = (int)Math.pow(2, 30)+1;
int BITS = 2; int BITS = 2;
Packed32 p32 = new Packed32(INDEX, BITS);
p32.set(INDEX-1, 1);
assertEquals("The value at position " + (INDEX-1)
+ " should be correct for Packed32", 1, p32.get(INDEX-1));
p32 = null; // To free the 256MB used
Packed64 p64 = new Packed64(INDEX, BITS); Packed64 p64 = new Packed64(INDEX, BITS);
p64.set(INDEX-1, 1); p64.set(INDEX-1, 1);
assertEquals("The value at position " + (INDEX-1) assertEquals("The value at position " + (INDEX-1)
+ " should be correct for Packed64", 1, p64.get(INDEX-1)); + " should be correct for Packed64", 1, p64.get(INDEX-1));
p64 = null;
for (int bits = 1; bits <=64; ++bits) {
if (Packed64SingleBlock.isSupported(bits)) {
int index = Integer.MAX_VALUE / bits + (bits == 1 ? 0 : 1);
Packed64SingleBlock p64sb = Packed64SingleBlock.create(index, bits);
p64sb.set(index - 1, 1);
assertEquals("The value at position " + (index-1)
+ " should be correct for " + p64sb.getClass().getSimpleName(),
1, p64sb.get(index-1));
} }
} }
int index = Integer.MAX_VALUE / 24 + 1;
Packed8ThreeBlocks p8 = new Packed8ThreeBlocks(index);
p8.set(index - 1, 1);
assertEquals("The value at position " + (index-1)
+ " should be correct for Packed8ThreeBlocks", 1, p8.get(index-1));
p8 = null;
index = Integer.MAX_VALUE / 48 + 1;
Packed16ThreeBlocks p16 = new Packed16ThreeBlocks(index);
p16.set(index - 1, 1);
assertEquals("The value at position " + (index-1)
+ " should be correct for Packed16ThreeBlocks", 1, p16.get(index-1));
p16 = null;
}
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.FieldCache.DocTerms;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.packed.PackedInts;
/** /**
* Use a field value and find the Document Frequency within another field. * Use a field value and find the Document Frequency within another field.
@ -52,7 +53,7 @@ public class JoinDocFreqValueSource extends FieldCacheSource {
@Override @Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
{ {
final DocTerms terms = cache.getTerms(readerContext.reader(), field, true ); final DocTerms terms = cache.getTerms(readerContext.reader(), field, PackedInts.FAST);
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader(); final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
return new IntDocValues(this) { return new IntDocValues(this) {