LUCENE-4062: add new aligned packed bits impls for faster performance

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1342751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-05-25 18:09:39 +00:00
parent f4819005cf
commit 6a4a717220
29 changed files with 1095 additions and 452 deletions

View File

@ -921,6 +921,11 @@ Optimizations
* LUCENE-4061: improve synchronization in DirectoryTaxonomyWriter.addCategory
and few general improvements to DirectoryTaxonomyWriter.
(Shai Erera, Gilad Barkai)
* LUCENE-4062: Add new aligned packed bits impls for faster lookup
performance; add float acceptableOverheadRatio to getWriter and
getMutable API to give packed ints freedom to pick faster
implementations (Adrien Grand via Mike McCandless)
Bug fixes

View File

@ -328,8 +328,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
// we'd have to try @ fewer bits and then grow
// if we overflowed it.
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue());
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue());
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
termsDictOffsets = termsDictOffsetsM;
termOffsets = termOffsetsM;

View File

@ -183,7 +183,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
// write primary terms dict offsets
packedIndexStart = out.getFilePointer();
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer));
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer), PackedInts.DEFAULT);
// relative to our indexStart
long upto = 0;
@ -196,7 +196,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
packedOffsetsStart = out.getFilePointer();
// write offsets into the byte[] terms
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength));
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength), PackedInts.DEFAULT);
upto = 0;
for(int i=0;i<numIndexTerms;i++) {
w.add(upto);

View File

@ -74,7 +74,7 @@ class TermInfosReaderIndex {
PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, false);
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT);
String currentField = null;
List<String> fieldStrs = new ArrayList<String>();

View File

@ -115,17 +115,19 @@ public final class Bytes {
* {@link Writer}. A call to {@link Writer#finish(int)} will release
* all internally used resources and frees the memory tracking
* reference.
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
* @param acceptableOverheadRatio
* how to trade space for speed. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
* {@link Type#BYTES_VAR_SORTED}.
* @param context I/O Context
* @return a new {@link Writer} instance
* @throws IOException
* if the files for the writer can not be created.
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
*/
public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
boolean fixedSize, Comparator<BytesRef> sortComparator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
Counter bytesUsed, IOContext context, float acceptableOverheadRatio)
throws IOException {
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
@ -139,7 +141,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
}
} else {
if (mode == Mode.STRAIGHT) {
@ -147,7 +149,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
}
}
@ -382,32 +384,32 @@ public final class Bytes {
protected int lastDocId = -1;
protected int[] docToEntry;
protected final BytesRefHash hash;
protected final boolean fasterButMoreRam;
protected final float acceptableOverheadRatio;
protected long maxBytes = 0;
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int codecVersion, Counter bytesUsed, IOContext context, Type type)
throws IOException {
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, PackedInts.DEFAULT, type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
int codecVersion, Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type)
throws IOException {
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, acceptableOverheadRatio, type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type) throws IOException {
super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
docToEntry = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
this.fasterButMoreRam = fasterButMoreRam;
this.acceptableOverheadRatio = acceptableOverheadRatio;
}
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
@ -506,7 +508,7 @@ public final class Bytes {
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, int[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
bitsRequired(maxValue));
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
@ -530,7 +532,7 @@ public final class Bytes {
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, long[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
bitsRequired(maxValue));
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
@ -550,11 +552,6 @@ public final class Bytes {
}
w.finish();
}
protected int bitsRequired(long maxValue){
return fasterButMoreRam ?
PackedInts.getNextFixedSize(PackedInts.bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue);
}
}

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.packed.PackedInts;
/**
* Abstract base class for PerDocConsumer implementations
@ -41,7 +42,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
protected final String segmentName;
private final Counter bytesUsed;
protected final IOContext context;
private final boolean fasterButMoreRam;
private final float acceptableOverheadRatio;
/**
* Filename extension for index files
@ -57,20 +58,22 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
* @param state The state to initiate a {@link PerDocConsumer} instance
*/
protected DocValuesWriterBase(PerDocWriteState state) {
this(state, true);
this(state, PackedInts.FAST);
}
/**
* @param state The state to initiate a {@link PerDocConsumer} instance
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
* @param acceptableOverheadRatio
* how to trade space for speed. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
* {@link Type#BYTES_VAR_SORTED}.
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
*/
protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) {
protected DocValuesWriterBase(PerDocWriteState state, float acceptableOverheadRatio) {
this.segmentName = state.segmentName;
this.bytesUsed = state.bytesUsed;
this.context = state.context;
this.fasterButMoreRam = fasterButMoreRam;
this.acceptableOverheadRatio = acceptableOverheadRatio;
}
protected abstract Directory getDirectory() throws IOException;
@ -83,7 +86,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
return Writer.create(valueType,
PerDocProducerBase.docValuesId(segmentName, field.number),
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
getDirectory(), getComparator(), bytesUsed, context, acceptableOverheadRatio);
}

View File

@ -58,8 +58,8 @@ class FixedSortedBytesImpl {
private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_FIXED_SORTED);
this.comp = comp;
}
@ -77,7 +77,7 @@ class FixedSortedBytesImpl {
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(maxOrd);
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
PackedInts.bitsRequired(maxOrd));
PackedInts.bitsRequired(maxOrd), PackedInts.DEFAULT);
for (SortedSourceSlice slice : slices) {
slice.writeOrds(ordsWriter);
}

View File

@ -103,7 +103,7 @@ class PackedIntValues {
: ++maxValue - minValue;
datOut.writeLong(defaultValue);
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
PackedInts.bitsRequired(maxValue - minValue));
PackedInts.bitsRequired(maxValue - minValue), PackedInts.DEFAULT);
for (int i = 0; i < lastDocID + 1; i++) {
set(bytesRef, i);
byte[] bytes = bytesRef.bytes;

View File

@ -60,8 +60,8 @@ final class VarSortedBytesImpl {
private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_VAR_SORTED);
this.comp = comp;
size = 0;
}
@ -83,7 +83,7 @@ final class VarSortedBytesImpl {
idxOut.writeLong(maxBytes);
final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1,
PackedInts.bitsRequired(maxBytes));
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
offsetWriter.add(0);
for (int i = 0; i < maxOrd; i++) {
offsetWriter.add(offsets[i]);
@ -91,7 +91,7 @@ final class VarSortedBytesImpl {
offsetWriter.finish();
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
PackedInts.bitsRequired(maxOrd-1));
PackedInts.bitsRequired(maxOrd-1), PackedInts.DEFAULT);
for (SortedSourceSlice slice : slices) {
slice.writeOrds(ordsWriter);
}
@ -127,7 +127,7 @@ final class VarSortedBytesImpl {
// total bytes of data
idxOut.writeLong(maxBytes);
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
bitsRequired(maxBytes));
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
// first dump bytes data, recording index & write offset as
// we go
final BytesRef spare = new BytesRef();

View File

@ -198,7 +198,7 @@ class VarStraightBytesImpl {
if (lastDocID == -1) {
idxOut.writeVLong(0);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(0));
PackedInts.bitsRequired(0), PackedInts.DEFAULT);
// docCount+1 so we write sentinel
for (int i = 0; i < docCount+1; i++) {
w.add(0);
@ -208,7 +208,7 @@ class VarStraightBytesImpl {
fill(docCount, address);
idxOut.writeVLong(address);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(address));
PackedInts.bitsRequired(address), PackedInts.DEFAULT);
for (int i = 0; i < docCount; i++) {
w.add(docToAddress[i]);
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.packed.PackedInts;
/**
* Abstract API for per-document stored primitive values of type <tt>byte[]</tt>
@ -77,14 +78,16 @@ abstract class Writer extends DocValuesConsumer {
* the {@link Directory} to create the files from.
* @param bytesUsed
* a byte-usage tracking reference
* @param fasterButMoreRam Whether the space used for packed ints should be rounded up for higher lookup performance.
* Currently this parameter only applies for types {@link Type#BYTES_VAR_SORTED}
* and {@link Type#BYTES_FIXED_SORTED}.
* @param acceptableOverheadRatio
* how to trade space for speed. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
* {@link Type#BYTES_VAR_SORTED}.
* @return a new {@link Writer} instance for the given {@link Type}
* @throws IOException
* @see PackedInts#getReader(org.apache.lucene.store.DataInput, float)
*/
public static DocValuesConsumer create(Type type, String id, Directory directory,
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
}
@ -101,22 +104,22 @@ abstract class Writer extends DocValuesConsumer {
return Floats.getWriter(directory, id, bytesUsed, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
bytesUsed, context, fasterButMoreRam);
bytesUsed, context, acceptableOverheadRatio);
case BYTES_FIXED_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
bytesUsed, context, fasterButMoreRam);
bytesUsed, context, acceptableOverheadRatio);
case BYTES_FIXED_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
bytesUsed, context, fasterButMoreRam);
bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
bytesUsed, context, fasterButMoreRam);
bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
bytesUsed, context, fasterButMoreRam);
bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
bytesUsed, context, fasterButMoreRam);
bytesUsed, context, acceptableOverheadRatio);
default:
throw new IllegalArgumentException("Unknown Values: " + type);
}

View File

@ -494,7 +494,7 @@ public interface FieldCache {
* faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */
public DocTerms getTerms (AtomicReader reader, String field, boolean fasterButMoreRAM)
public DocTerms getTerms (AtomicReader reader, String field, float acceptableOverheadRatio)
throws IOException;
/** Returned by {@link #getTermsIndex} */
@ -571,7 +571,7 @@ public interface FieldCache {
* faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */
public DocTermsIndex getTermsIndex (AtomicReader reader, String field, boolean fasterButMoreRAM)
public DocTermsIndex getTermsIndex (AtomicReader reader, String field, float acceptableOverheadRatio)
throws IOException;
/**

View File

@ -1071,14 +1071,12 @@ class FieldCacheImpl implements FieldCache {
}
}
private static boolean DEFAULT_FASTER_BUT_MORE_RAM = true;
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
return getTermsIndex(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
return getTermsIndex(reader, field, PackedInts.FAST);
}
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException {
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false);
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
}
static class DocTermsIndexCache extends Cache {
@ -1092,7 +1090,7 @@ class FieldCacheImpl implements FieldCache {
Terms terms = reader.terms(entryKey.field);
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
final PagedBytes bytes = new PagedBytes(15);
@ -1142,8 +1140,8 @@ class FieldCacheImpl implements FieldCache {
startNumUniqueTerms = 1;
}
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM);
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, fasterButMoreRAM);
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
@ -1219,11 +1217,11 @@ class FieldCacheImpl implements FieldCache {
// TODO: this if DocTermsIndex was already created, we
// should share it...
public DocTerms getTerms(AtomicReader reader, String field) throws IOException {
return getTerms(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
return getTerms(reader, field, PackedInts.FAST);
}
public DocTerms getTerms(AtomicReader reader, String field, boolean fasterButMoreRAM) throws IOException {
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM)), false);
public DocTerms getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
}
static final class DocTermsCache extends Cache {
@ -1237,7 +1235,7 @@ class FieldCacheImpl implements FieldCache {
Terms terms = reader.terms(entryKey.field);
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
final int termCountHardLimit = reader.maxDoc();
@ -1268,7 +1266,7 @@ class FieldCacheImpl implements FieldCache {
startBPV = 1;
}
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM);
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), acceptableOverheadRatio);
// pointer==0 means not set
bytes.copyUsingLengthPrefix(new BytesRef());

View File

@ -0,0 +1,54 @@
package org.apache.lucene.util.packed;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
final class DirectPacked64SingleBlockReader extends PackedInts.ReaderImpl {
private final IndexInput in;
private final long startPointer;
private final int valuesPerBlock;
private final long mask;
DirectPacked64SingleBlockReader(int bitsPerValue, int valueCount,
IndexInput in) {
super(valueCount, bitsPerValue);
this.in = in;
startPointer = in.getFilePointer();
valuesPerBlock = 64 / bitsPerValue;
mask = ~(~0L << bitsPerValue);
}
@Override
public long get(int index) {
final int blockOffset = index / valuesPerBlock;
final long skip = ((long) blockOffset) << 3;
try {
in.seek(startPointer + skip);
long block = in.readLong();
final int offsetInBlock = index % valuesPerBlock;
return (block >>> (offsetInBlock * bitsPerValue)) & mask;
} catch (IOException e) {
throw new IllegalStateException("failed", e);
}
}
}

View File

@ -22,11 +22,9 @@ import org.apache.lucene.store.IndexInput;
import java.io.IOException;
/* Reads directly from disk on each get */
final class DirectReader implements PackedInts.Reader {
final class DirectPackedReader extends PackedInts.ReaderImpl {
private final IndexInput in;
private final long startPointer;
private final int bitsPerValue;
private final int valueCount;
private static final int BLOCK_BITS = Packed64.BLOCK_BITS;
private static final int MOD_MASK = Packed64.MOD_MASK;
@ -34,10 +32,9 @@ final class DirectReader implements PackedInts.Reader {
// masks[n-1] masks for bottom n bits
private final long[] masks;
public DirectReader(int bitsPerValue, int valueCount, IndexInput in)
public DirectPackedReader(int bitsPerValue, int valueCount, IndexInput in)
throws IOException {
this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue;
super(valueCount, bitsPerValue);
this.in = in;
long v = 1;
@ -50,26 +47,6 @@ final class DirectReader implements PackedInts.Reader {
startPointer = in.getFilePointer();
}
@Override
public int getBitsPerValue() {
return bitsPerValue;
}
@Override
public int size() {
return valueCount;
}
@Override
public boolean hasArray() {
return false;
}
@Override
public Object getArray() {
return null;
}
@Override
public long get(int index) {
final long majorBitPos = (long)index * bitsPerValue;

View File

@ -28,22 +28,14 @@ public class GrowableWriter implements PackedInts.Mutable {
private long currentMaxValue;
private PackedInts.Mutable current;
private final boolean roundFixedSize;
private final float acceptableOverheadRatio;
public GrowableWriter(int startBitsPerValue, int valueCount, boolean roundFixedSize) {
this.roundFixedSize = roundFixedSize;
current = PackedInts.getMutable(valueCount, getSize(startBitsPerValue));
public GrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
this.acceptableOverheadRatio = acceptableOverheadRatio;
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
}
private final int getSize(int bpv) {
if (roundFixedSize) {
return PackedInts.getNextFixedSize(bpv);
} else {
return bpv;
}
}
public long get(int index) {
return current.get(index);
}
@ -78,7 +70,7 @@ public class GrowableWriter implements PackedInts.Mutable {
currentMaxValue *= 2;
}
final int valueCount = size();
PackedInts.Mutable next = PackedInts.getMutable(valueCount, getSize(bpv));
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bpv, acceptableOverheadRatio);
for(int i=0;i<valueCount;i++) {
next.set(i, current.get(i));
}
@ -93,11 +85,12 @@ public class GrowableWriter implements PackedInts.Mutable {
}
public GrowableWriter resize(int newSize) {
GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, roundFixedSize);
GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
final int limit = Math.min(size(), newSize);
for(int i=0;i<limit;i++) {
next.set(i, get(i));
}
return next;
}
}

View File

@ -0,0 +1,85 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** 48 bitsPerValue backed by short[] */
final class Packed16ThreeBlocks extends PackedInts.ReaderImpl
implements PackedInts.Mutable {
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final short[] blocks;
Packed16ThreeBlocks(int valueCount) {
super(valueCount, 48);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
this.blocks = new short[3 * valueCount];
}
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < blocks.length; i++) {
blocks[i] = in.readShort();
}
final int mod = blocks.length % 4;
if (mod != 0) {
final int pad = 4 - mod;
// round out long
for (int i = 0; i < pad; i++) {
in.readShort();
}
}
}
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o] = (short) (value >> 32);
blocks[o+1] = (short) (value >> 16);
blocks[o+2] = (short) value;
}
@Override
public void clear() {
Arrays.fill(blocks, (short) 0);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
}

View File

@ -1,227 +0,0 @@
package org.apache.lucene.util.packed;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/**
* Space optimized random access capable array of values with a fixed number of
* bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher
* numbers.
* </p><p>
* The implementation strives to avoid conditionals and expensive operations,
* sacrificing code clarity to achieve better performance.
*/
class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
static final int BLOCK_SIZE = 32; // 32 = int, 64 = long
static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
private static final int ENTRY_SIZE = BLOCK_SIZE + 1;
private static final int FAC_BITPOS = 3;
/*
* In order to make an efficient value-getter, conditionals should be
* avoided. A value can be positioned inside of a block, requiring shifting
* left or right or it can span two blocks, requiring a left-shift on the
* first block and a right-shift on the right block.
* </p><p>
* By always shifting the first block both left and right, we get exactly
* the right bits. By always shifting the second block right and applying
* a mask, we get the right bits there. After that, we | the two bitsets.
*/
private static final int[][] SHIFTS =
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE];
static { // Generate shifts
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
int[] currentShifts = SHIFTS[elementBits];
int base = bitPos * FAC_BITPOS;
currentShifts[base ] = bitPos;
currentShifts[base + 1] = BLOCK_SIZE - elementBits;
if (bitPos <= BLOCK_SIZE - elementBits) { // Single block
currentShifts[base + 2] = 0;
MASKS[elementBits][bitPos] = 0;
} else { // Two blocks
int rBits = elementBits - (BLOCK_SIZE - bitPos);
currentShifts[base + 2] = BLOCK_SIZE - rBits;
MASKS[elementBits][bitPos] = ~(~0 << rBits);
}
}
}
}
/*
* The setter requires more masking than the getter.
*/
private static final int[][] WRITE_MASKS =
new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
static {
for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
int elementPosMask = ~(~0 << elementBits);
int[] currentShifts = SHIFTS[elementBits];
int[] currentMasks = WRITE_MASKS[elementBits];
for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
int base = bitPos * FAC_BITPOS;
currentMasks[base ] =~((elementPosMask
<< currentShifts[base + 1])
>>> currentShifts[base]);
if (bitPos <= BLOCK_SIZE - elementBits) { // Second block not used
currentMasks[base+1] = ~0; // Keep all bits
currentMasks[base+2] = 0; // Or with 0
} else {
currentMasks[base+1] = ~(elementPosMask
<< currentShifts[base + 2]);
currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0;
}
}
}
}
/* The bits */
private int[] blocks;
// Cached calculations
private int maxPos; // blocks.length * BLOCK_SIZE / bitsPerValue - 1
private int[] shifts; // The shifts for the current bitsPerValue
private int[] readMasks;
private int[] writeMasks;
/**
* Creates an array with the internal structures adjusted for the given
* limits and initialized to 0.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* Note: bitsPerValue >32 is not supported by this implementation.
*/
public Packed32(int valueCount, int bitsPerValue) {
this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)],
valueCount, bitsPerValue);
}
/**
* Creates an array with content retrieved from the given DataInput.
* @param in a DataInput, positioned at the start of Packed64-content.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @throws java.io.IOException if the values for the backing array could not
* be retrieved.
*/
public Packed32(DataInput in, int valueCount, int bitsPerValue)
throws IOException {
super(valueCount, bitsPerValue);
int size = size(bitsPerValue, valueCount);
blocks = new int[size + 1]; // +1 due to non-conditional tricks
// TODO: find a faster way to bulk-read ints...
for(int i = 0 ; i < size ; i++) {
blocks[i] = in.readInt();
}
if (size % 2 == 1) {
in.readInt(); // Align to long
}
updateCached();
}
private static int size(int bitsPerValue, int valueCount) {
final long totBitCount = (long) valueCount * bitsPerValue;
return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1));
}
/**
* Creates an array backed by the given blocks.
* </p><p>
* Note: The blocks are used directly, so changes to the given block will
* affect the Packed32-structure.
* @param blocks used as the internal backing array.
* @param valueCount the number of values.
* @param bitsPerValue the number of bits available for any given value.
* Note: bitsPerValue >32 is not supported by this implementation.
*/
public Packed32(int[] blocks, int valueCount, int bitsPerValue) {
// TODO: Check that blocks.length is sufficient for holding length values
super(valueCount, bitsPerValue);
if (bitsPerValue > 31) {
throw new IllegalArgumentException(String.format(
"This array only supports values of 31 bits or less. The "
+ "required number of bits was %d. The Packed64 "
+ "implementation allows values with more than 31 bits",
bitsPerValue));
}
this.blocks = blocks;
updateCached();
}
private void updateCached() {
readMasks = MASKS[bitsPerValue];
maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2);
shifts = SHIFTS[bitsPerValue];
writeMasks = WRITE_MASKS[bitsPerValue];
}
/**
* @param index the position of the value.
* @return the value at the given index.
*/
public long get(final int index) {
assert index >= 0 && index < size();
final long majorBitPos = (long)index * bitsPerValue;
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
final int base = bitPos * FAC_BITPOS;
return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
}
public void set(final int index, final long value) {
final int intValue = (int)value;
final long majorBitPos = (long)index * bitsPerValue;
final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
final int base = bitPos * FAC_BITPOS;
blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base])
| (intValue << shifts[base + 1] >>> shifts[base]);
blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1])
| ((intValue << shifts[base + 2])
& writeMasks[base+2]);
}
public void clear() {
Arrays.fill(blocks, 0);
}
@Override
public String toString() {
return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos
+ ", elements.length=" + blocks.length + ")";
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
}

View File

@ -0,0 +1,365 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* This class is similar to {@link Packed64} except that it trades space for
* speed by ensuring that a single block needs to be read/written in order to
* read/write a value.
*/
abstract class Packed64SingleBlock extends PackedInts.ReaderImpl
implements PackedInts.Mutable {
private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] {1, 2, 3, 4,
5, 6, 7, 9, 10, 12, 21};
private static final long[][] WRITE_MASKS = new long[22][];
private static final int[][] SHIFTS = new int[22][];
static {
for (int bpv : SUPPORTED_BITS_PER_VALUE) {
initMasks(bpv);
}
}
protected static void initMasks(int bpv) {
int valuesPerBlock = Long.SIZE / bpv;
long[] writeMasks = new long[valuesPerBlock];
int[] shifts = new int[valuesPerBlock];
long bits = (1L << bpv) - 1;
for (int i = 0; i < valuesPerBlock; ++i) {
shifts[i] = bpv * i;
writeMasks[i] = ~(bits << shifts[i]);
}
WRITE_MASKS[bpv] = writeMasks;
SHIFTS[bpv] = shifts;
}
public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {
switch (bitsPerValue) {
case 1:
return new Packed64SingleBlock1(valueCount);
case 2:
return new Packed64SingleBlock2(valueCount);
case 3:
return new Packed64SingleBlock3(valueCount);
case 4:
return new Packed64SingleBlock4(valueCount);
case 5:
return new Packed64SingleBlock5(valueCount);
case 6:
return new Packed64SingleBlock6(valueCount);
case 7:
return new Packed64SingleBlock7(valueCount);
case 9:
return new Packed64SingleBlock9(valueCount);
case 10:
return new Packed64SingleBlock10(valueCount);
case 12:
return new Packed64SingleBlock12(valueCount);
case 21:
return new Packed64SingleBlock21(valueCount);
default:
throw new IllegalArgumentException("Unsupported bitsPerValue: "
+ bitsPerValue);
}
}
public static Packed64SingleBlock create(DataInput in,
int valueCount, int bitsPerValue) throws IOException {
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
public static boolean isSupported(int bitsPerValue) {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
public static float overheadPerValue(int bitsPerValue) {
int valuesPerBlock = 64 / bitsPerValue;
int overhead = 64 % bitsPerValue;
return (float) overhead / valuesPerBlock;
}
protected final long[] blocks;
protected final int valuesPerBlock;
protected final int[] shifts;
protected final long[] writeMasks;
protected final long readMask;
Packed64SingleBlock(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
valuesPerBlock = Long.SIZE / bitsPerValue;
blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
shifts = SHIFTS[bitsPerValue];
writeMasks = WRITE_MASKS[bitsPerValue];
readMask = ~writeMasks[0];
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
}
protected int blockOffset(int offset) {
return offset / valuesPerBlock;
}
protected int offsetInBlock(int offset) {
return offset % valuesPerBlock;
}
@Override
public long get(int index) {
final int o = blockOffset(index);
final int b = offsetInBlock(index);
return (blocks[o] >> shifts[b]) & readMask;
}
@Override
public void set(int index, long value) {
final int o = blockOffset(index);
final int b = offsetInBlock(index);
blocks[o] = (blocks[o] & writeMasks[b]) | (value << shifts[b]);
}
@Override
public void clear() {
Arrays.fill(blocks, 0L);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
// Specialisations that allow the JVM to optimize computation of the block
// offset as well as the offset in block
static final class Packed64SingleBlock21 extends Packed64SingleBlock {
Packed64SingleBlock21(int valueCount) {
super(valueCount, 21);
assert valuesPerBlock == 3;
}
@Override
protected int blockOffset(int offset) {
return offset / 3;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 3;
}
}
static final class Packed64SingleBlock12 extends Packed64SingleBlock {
Packed64SingleBlock12(int valueCount) {
super(valueCount, 12);
assert valuesPerBlock == 5;
}
@Override
protected int blockOffset(int offset) {
return offset / 5;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 5;
}
}
static final class Packed64SingleBlock10 extends Packed64SingleBlock {
Packed64SingleBlock10(int valueCount) {
super(valueCount, 10);
assert valuesPerBlock == 6;
}
@Override
protected int blockOffset(int offset) {
return offset / 6;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 6;
}
}
static final class Packed64SingleBlock9 extends Packed64SingleBlock {
Packed64SingleBlock9(int valueCount) {
super(valueCount, 9);
assert valuesPerBlock == 7;
}
@Override
protected int blockOffset(int offset) {
return offset / 7;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 7;
}
}
static final class Packed64SingleBlock7 extends Packed64SingleBlock {
Packed64SingleBlock7(int valueCount) {
super(valueCount, 7);
assert valuesPerBlock == 9;
}
@Override
protected int blockOffset(int offset) {
return offset / 9;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 9;
}
}
static final class Packed64SingleBlock6 extends Packed64SingleBlock {
Packed64SingleBlock6(int valueCount) {
super(valueCount, 6);
assert valuesPerBlock == 10;
}
@Override
protected int blockOffset(int offset) {
return offset / 10;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 10;
}
}
static final class Packed64SingleBlock5 extends Packed64SingleBlock {
Packed64SingleBlock5(int valueCount) {
super(valueCount, 5);
assert valuesPerBlock == 12;
}
@Override
protected int blockOffset(int offset) {
return offset / 12;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 12;
}
}
static final class Packed64SingleBlock4 extends Packed64SingleBlock {
Packed64SingleBlock4(int valueCount) {
super(valueCount, 4);
assert valuesPerBlock == 16;
}
@Override
protected int blockOffset(int offset) {
return offset >> 4;
}
@Override
protected int offsetInBlock(int offset) {
return offset & 15;
}
}
static final class Packed64SingleBlock3 extends Packed64SingleBlock {
Packed64SingleBlock3(int valueCount) {
super(valueCount, 3);
assert valuesPerBlock == 21;
}
@Override
protected int blockOffset(int offset) {
return offset / 21;
}
@Override
protected int offsetInBlock(int offset) {
return offset % 21;
}
}
static final class Packed64SingleBlock2 extends Packed64SingleBlock {
Packed64SingleBlock2(int valueCount) {
super(valueCount, 2);
assert valuesPerBlock == 32;
}
@Override
protected int blockOffset(int offset) {
return offset >> 5;
}
@Override
protected int offsetInBlock(int offset) {
return offset & 31;
}
}
static final class Packed64SingleBlock1 extends Packed64SingleBlock {
Packed64SingleBlock1(int valueCount) {
super(valueCount, 1);
assert valuesPerBlock == 64;
}
@Override
protected int blockOffset(int offset) {
return offset >> 6;
}
@Override
protected int offsetInBlock(int offset) {
return offset & 63;
}
}
}

View File

@ -0,0 +1,88 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
private long pending;
private int shift;
private final long mask;
private int position;
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
throws IOException {
super(valueCount, bitsPerValue, in);
pending = 0;
shift = 64;
mask = ~(~0L << bitsPerValue);
position = -1;
}
@Override
public long next() throws IOException {
if (shift + bitsPerValue > 64) {
pending = in.readLong();
shift = 0;
}
final long next = (pending >>> shift) & mask;
shift += bitsPerValue;
++position;
return next;
}
@Override
public int ord() {
return position;
}
@Override
public long advance(int ord) throws IOException {
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final int valuesPerBlock = 64 / bitsPerValue;
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
final long targetBlock = ord / valuesPerBlock;
final long blocksToSkip = targetBlock - nextBlock;
if (blocksToSkip > 0) {
final long skip = blocksToSkip << 3;
final long filePointer = in.getFilePointer();
in.seek(filePointer + skip);
shift = 64;
final int offsetInBlock = ord % valuesPerBlock;
for (int i = 0; i < offsetInBlock; ++i) {
next();
}
} else {
for (int i = position; i < ord - 1; ++i) {
next();
}
}
position = ord - 1;
return next();
}
}

View File

@ -0,0 +1,81 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.PackedInts.Writer;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link Writer} for {@link Packed64SingleBlock} readers.
*/
final class Packed64SingleBlockWriter extends Writer {
private long pending;
private int shift;
private int written;
Packed64SingleBlockWriter(DataOutput out, int valueCount,
int bitsPerValue) throws IOException {
super(out, valueCount, bitsPerValue);
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
pending = 0;
shift = 0;
written = 0;
}
@Override
protected int getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK;
}
@Override
public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
assert v >= 0;
if (shift + bitsPerValue > Long.SIZE) {
out.writeLong(pending);
pending = 0;
shift = 0;
}
pending |= v << shift;
shift += bitsPerValue;
++written;
}
@Override
public void finish() throws IOException {
while (written < valueCount) {
add(0L); // Auto flush
}
if (shift > 0) {
// add was called at least once
out.writeLong(pending);
}
}
@Override
public String toString() {
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
+ bitsPerValue + " bits/value)";
}
}

View File

@ -0,0 +1,86 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** 24 bitsPerValue backed by byte[] */
final class Packed8ThreeBlocks extends PackedInts.ReaderImpl
implements PackedInts.Mutable {
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final byte[] blocks;
Packed8ThreeBlocks(int valueCount) {
super(valueCount, 24);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
this.blocks = new byte[3 * valueCount];
}
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < blocks.length; i++) {
blocks[i] = in.readByte();
}
final int mod = blocks.length % 8;
if (mod != 0) {
final int pad = 8 - mod;
// round out long
for (int i = 0; i < pad; i++) {
in.readByte();
}
}
}
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o+2] = (byte) value;
blocks[o+1] = (byte) (value >> 8);
blocks[o] = (byte) (value >> 16);
}
@Override
public void clear() {
Arrays.fill(blocks, (byte) 0);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
}

View File

@ -23,7 +23,6 @@ import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Constants;
import java.io.IOException;
@ -38,10 +37,33 @@ import java.io.IOException;
public class PackedInts {
/**
* At most 700% memory overhead, always select a direct implementation.
*/
public static final float FASTEST = 7f;
/**
* At most 50% memory overhead, always select a reasonably fast implementation.
*/
public static final float FAST = 0.5f;
/**
* At most 20% memory overhead.
*/
public static final float DEFAULT = 0.2f;
/**
* No memory overhead at all, but the returned implementation may be slow.
*/
public static final float COMPACT = 0f;
private final static String CODEC_NAME = "PackedInts";
private final static int VERSION_START = 0;
private final static int VERSION_CURRENT = VERSION_START;
static final int PACKED = 0;
static final int PACKED_SINGLE_BLOCK = 1;
/**
* A read-only random access array of positive integers.
* @lucene.internal
@ -103,7 +125,35 @@ public class PackedInts {
* @throws IOException if reading the value throws an IOException*/
long advance(int ord) throws IOException;
}
static abstract class ReaderIteratorImpl implements ReaderIterator {
protected final IndexInput in;
protected final int bitsPerValue;
protected final int valueCount;
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
this.in = in;
this.bitsPerValue = bitsPerValue;
this.valueCount = valueCount;
}
@Override
public int getBitsPerValue() {
return bitsPerValue;
}
@Override
public int size() {
return valueCount;
}
@Override
public void close() throws IOException {
in.close();
}
}
/**
* A packed integer array that can be modified.
* @lucene.internal
@ -118,8 +168,7 @@ public class PackedInts {
/**
* Sets all values to 0.
*/
*/
void clear();
}
@ -145,10 +194,6 @@ public class PackedInts {
return valueCount;
}
public long getMaxValue() { // Convenience method
return maxValue(bitsPerValue);
}
public Object getArray() {
return null;
}
@ -176,8 +221,10 @@ public class PackedInts {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(bitsPerValue);
out.writeVInt(valueCount);
out.writeVInt(getFormat());
}
protected abstract int getFormat();
public abstract void add(long v) throws IOException;
public abstract void finish() throws IOException;
}
@ -185,6 +232,7 @@ public class PackedInts {
/**
* Retrieve PackedInt data from the DataInput and return a packed int
* structure based on it.
*
* @param in positioned at the beginning of a stored packed int structure.
* @return a read only random access capable array of positive integers.
* @throws IOException if the structure could not be retrieved.
@ -195,22 +243,30 @@ public class PackedInts {
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (bitsPerValue) {
case 8:
return new Direct8(in, valueCount);
case 16:
return new Direct16(in, valueCount);
case 32:
return new Direct32(in, valueCount);
case 64:
return new Direct64(in, valueCount);
default:
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
return new Packed64(in, valueCount, bitsPerValue);
} else {
return new Packed32(in, valueCount, bitsPerValue);
}
switch (format) {
case PACKED:
switch (bitsPerValue) {
case 8:
return new Direct8(in, valueCount);
case 16:
return new Direct16(in, valueCount);
case 24:
return new Packed8ThreeBlocks(in, valueCount);
case 32:
return new Direct32(in, valueCount);
case 48:
return new Packed16ThreeBlocks(in, valueCount);
case 64:
return new Direct64(in, valueCount);
default:
return new Packed64(in, valueCount, bitsPerValue);
}
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
@ -226,7 +282,15 @@ public class PackedInts {
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
return new PackedReaderIterator(bitsPerValue, valueCount, in);
final int format = in.readVInt();
switch (format) {
case PACKED:
return new PackedReaderIterator(valueCount, bitsPerValue, in);
case PACKED_SINGLE_BLOCK:
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
/**
@ -243,38 +307,70 @@ public class PackedInts {
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
return new DirectReader(bitsPerValue, valueCount, in);
final int format = in.readVInt();
switch (format) {
case PACKED:
return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
/**
* Create a packed integer array with the given amount of values initialized
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
* All Mutables known by this factory are kept fully in RAM.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @return a mutable packed integer array.
*
* Positive values of <code>acceptableOverheadRatio</code> will trade space
* for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
* that the fastest implementation is selected.
*
* @param valueCount the number of elements
* @param bitsPerValue the number of bits available for any given value
* @param acceptableOverheadRatio an acceptable overhead
* ratio per value
* @return a mutable packed integer array
* @throws java.io.IOException if the Mutable could not be created. With the
* current implementations, this never happens, but the method
* signature allows for future persistence-backed Mutables.
* @lucene.internal
*/
public static Mutable getMutable(
int valueCount, int bitsPerValue) {
switch (bitsPerValue) {
case 8:
public static Mutable getMutable(int valueCount,
int bitsPerValue, float acceptableOverheadRatio) {
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
return new Direct8(valueCount);
case 16:
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return new Direct16(valueCount);
case 32:
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return new Direct32(valueCount);
case 64:
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return new Direct64(valueCount);
default:
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
return new Packed64(valueCount, bitsPerValue);
} else {
return new Packed32(valueCount, bitsPerValue);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
return new Packed8ThreeBlocks(valueCount);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return new Packed16ThreeBlocks(valueCount);
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return Packed64SingleBlock.create(valueCount, bpv);
}
}
}
return new Packed64(valueCount, bitsPerValue);
}
}
@ -282,16 +378,55 @@ public class PackedInts {
* Create a packed integer array writer for the given number of values at the
* given bits/value. Writers append to the given IndexOutput and has very
* low memory overhead.
*
* Positive values of <code>acceptableOverheadRatio</code> will trade space
* for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
* that the fastest implementation is selected.
*
* @param out the destination for the produced bits.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @param acceptableOverheadRatio an acceptable overhead ratio per value
* @return a Writer ready for receiving values.
* @throws IOException if bits could not be written to out.
* @lucene.internal
*/
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue)
public static Writer getWriter(DataOutput out,
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
throws IOException {
return new PackedWriter(out, valueCount, bitsPerValue);
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
return new PackedWriter(out, valueCount, 8);
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return new PackedWriter(out, valueCount, 16);
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return new PackedWriter(out, valueCount, 32);
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return new PackedWriter(out, valueCount, 64);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
return new PackedWriter(out, valueCount, 24);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return new PackedWriter(out, valueCount, bitsPerValue);
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return new Packed64SingleBlockWriter(out, valueCount, bpv);
}
}
}
return new PackedWriter(out, valueCount, bitsPerValue);
}
}
/** Returns how many bits are required to hold values up
@ -301,14 +436,10 @@ public class PackedInts {
* @lucene.internal
*/
public static int bitsRequired(long maxValue) {
// Very high long values does not translate well to double, so we do an
// explicit check for the edge cases
if (maxValue > 0x3FFFFFFFFFFFFFFFL) {
return 63;
} if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
return 62;
if (maxValue < 0) {
throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")");
}
return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)));
return Math.max(1, 64 - Long.numberOfLeadingZeros(maxValue));
}
/**
@ -321,26 +452,4 @@ public class PackedInts {
public static long maxValue(int bitsPerValue) {
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
}
/** Rounds bitsPerValue up to 8, 16, 32 or 64. */
public static int getNextFixedSize(int bitsPerValue) {
if (bitsPerValue <= 8) {
return 8;
} else if (bitsPerValue <= 16) {
return 16;
} else if (bitsPerValue <= 32) {
return 32;
} else {
return 64;
}
}
/** Possibly wastes some storage in exchange for faster lookups */
public static int getRoundedFixedSize(int bitsPerValue) {
if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok
return getNextFixedSize(bitsPerValue);
} else {
return bitsPerValue;
}
}
}

View File

@ -21,24 +21,18 @@ import org.apache.lucene.store.IndexInput;
import java.io.IOException;
final class PackedReaderIterator implements PackedInts.ReaderIterator {
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
private long pending;
private int pendingBitsLeft;
private final IndexInput in;
private final int bitsPerValue;
private final int valueCount;
private int position = -1;
// masks[n-1] masks for bottom n bits
private final long[] masks;
public PackedReaderIterator(int bitsPerValue, int valueCount, IndexInput in)
public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in)
throws IOException {
super(valueCount, bitsPerValue, in);
this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue;
this.in = in;
masks = new long[bitsPerValue];
long v = 1;
@ -48,14 +42,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
}
}
public int getBitsPerValue() {
return bitsPerValue;
}
public int size() {
return valueCount;
}
public long next() throws IOException {
if (pendingBitsLeft == 0) {
pending = in.readLong();
@ -79,10 +65,6 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
return result;
}
public void close() throws IOException {
in.close();
}
public int ord() {
return position;
}

View File

@ -52,6 +52,11 @@ class PackedWriter extends PackedInts.Writer {
}
}
@Override
protected int getFormat() {
return PackedInts.PACKED;
}
/**
* Do not call this after finish
*/

View File

@ -40,6 +40,7 @@ import org.apache.lucene.util.Counter;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.packed.PackedInts;
// TODO: some of this should be under lucene40 codec tests? is talking to codec directly?f
public class TestDocValues extends LuceneTestCase {
@ -71,7 +72,7 @@ public class TestDocValues extends LuceneTestCase {
Directory dir = newDirectory();
final Counter trackBytes = Counter.newCounter();
DocValuesConsumer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random()),
random().nextBoolean());
random().nextFloat() * PackedInts.FAST);
int maxDoc = 220;
final String[] values = new String[maxDoc];
final int fixedLength = 1 + atLeast(50);

View File

@ -64,6 +64,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.packed.PackedInts;
public class TestIndexWriter extends LuceneTestCase {
@ -1677,7 +1678,7 @@ public class TestIndexWriter extends LuceneTestCase {
w.close();
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextBoolean());
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST);
assertEquals(5, dti.numOrd()); // +1 for null ord
assertEquals(4, dti.size());
assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef()));

View File

@ -19,6 +19,7 @@ package org.apache.lucene.util.packed;
import org.apache.lucene.store.*;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.packed.PackedInts.Reader;
import java.util.ArrayList;
import java.util.List;
@ -53,10 +54,10 @@ public class TestPackedInts extends LuceneTestCase {
for(int nbits=1;nbits<63;nbits++) {
final int valueCount = 100+random().nextInt(500);
final Directory d = newDirectory();
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
PackedInts.Writer w = PackedInts.getWriter(
out, valueCount, nbits);
out, valueCount, nbits, random().nextFloat()*PackedInts.FASTEST);
final long[] values = new long[valueCount];
for(int i=0;i<valueCount;i++) {
@ -188,16 +189,24 @@ public class TestPackedInts extends LuceneTestCase {
if (bitsPerValue <= 16) {
packedInts.add(new Direct16(valueCount));
}
if (bitsPerValue <= 31) {
packedInts.add(new Packed32(valueCount, bitsPerValue));
if (bitsPerValue <= 24 && valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
packedInts.add(new Packed8ThreeBlocks(valueCount));
}
if (bitsPerValue <= 32) {
packedInts.add(new Direct32(valueCount));
}
if (bitsPerValue <= 48 && valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
packedInts.add(new Packed16ThreeBlocks(valueCount));
}
if (bitsPerValue <= 63) {
packedInts.add(new Packed64(valueCount, bitsPerValue));
}
packedInts.add(new Direct64(valueCount));
for (int bpv = bitsPerValue; bpv <= 64; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
packedInts.add(Packed64SingleBlock.create(valueCount, bpv));
}
}
return packedInts;
}
@ -242,20 +251,26 @@ public class TestPackedInts extends LuceneTestCase {
}
public void testSingleValue() throws Exception {
Directory dir = newDirectory();
IndexOutput out = dir.createOutput("out", newIOContext(random()));
PackedInts.Writer w = PackedInts.getWriter(out, 1, 8);
w.add(17);
w.finish();
final long end = out.getFilePointer();
out.close();
for (int bitsPerValue = 1; bitsPerValue <= 64; ++bitsPerValue) {
Directory dir = newDirectory();
IndexOutput out = dir.createOutput("out", newIOContext(random()));
PackedInts.Writer w = PackedInts.getWriter(out, 1, bitsPerValue, PackedInts.DEFAULT);
long value = 17L & PackedInts.maxValue(bitsPerValue);
w.add(value);
w.finish();
final long end = out.getFilePointer();
out.close();
IndexInput in = dir.openInput("out", newIOContext(random()));
PackedInts.getReader(in);
assertEquals(end, in.getFilePointer());
in.close();
IndexInput in = dir.openInput("out", newIOContext(random()));
Reader reader = PackedInts.getReader(in);
String msg = "Impl=" + w.getClass().getSimpleName() + ", bitsPerValue=" + bitsPerValue;
assertEquals(msg, 1, reader.size());
assertEquals(msg, value, reader.get(0));
assertEquals(msg, end, in.getFilePointer());
in.close();
dir.close();
dir.close();
}
}
public void testSecondaryBlockChange() throws IOException {
@ -276,15 +291,36 @@ public class TestPackedInts extends LuceneTestCase {
int INDEX = (int)Math.pow(2, 30)+1;
int BITS = 2;
Packed32 p32 = new Packed32(INDEX, BITS);
p32.set(INDEX-1, 1);
assertEquals("The value at position " + (INDEX-1)
+ " should be correct for Packed32", 1, p32.get(INDEX-1));
p32 = null; // To free the 256MB used
Packed64 p64 = new Packed64(INDEX, BITS);
p64.set(INDEX-1, 1);
assertEquals("The value at position " + (INDEX-1)
+ " should be correct for Packed64", 1, p64.get(INDEX-1));
p64 = null;
for (int bits = 1; bits <=64; ++bits) {
if (Packed64SingleBlock.isSupported(bits)) {
int index = Integer.MAX_VALUE / bits + (bits == 1 ? 0 : 1);
Packed64SingleBlock p64sb = Packed64SingleBlock.create(index, bits);
p64sb.set(index - 1, 1);
assertEquals("The value at position " + (index-1)
+ " should be correct for " + p64sb.getClass().getSimpleName(),
1, p64sb.get(index-1));
}
}
int index = Integer.MAX_VALUE / 24 + 1;
Packed8ThreeBlocks p8 = new Packed8ThreeBlocks(index);
p8.set(index - 1, 1);
assertEquals("The value at position " + (index-1)
+ " should be correct for Packed8ThreeBlocks", 1, p8.get(index-1));
p8 = null;
index = Integer.MAX_VALUE / 48 + 1;
Packed16ThreeBlocks p16 = new Packed16ThreeBlocks(index);
p16.set(index - 1, 1);
assertEquals("The value at position " + (index-1)
+ " should be correct for Packed16ThreeBlocks", 1, p16.get(index-1));
p16 = null;
}
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.FieldCache.DocTerms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.packed.PackedInts;
/**
* Use a field value and find the Document Frequency within another field.
@ -52,7 +53,7 @@ public class JoinDocFreqValueSource extends FieldCacheSource {
@Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
{
final DocTerms terms = cache.getTerms(readerContext.reader(), field, true );
final DocTerms terms = cache.getTerms(readerContext.reader(), field, PackedInts.FAST);
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
return new IntDocValues(this) {