mirror of https://github.com/apache/lucene.git
LUCENE-5720: Optimize DirectPackedReader's decompression
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1599180 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6ff06b0856
commit
e83229fd20
|
@ -219,6 +219,8 @@ Optimizations
|
|||
* LUCENE-5694: Don't score() subscorers in DisjunctionSumScorer or
|
||||
DisjunctionMaxScorer unless score() is called. (Robert Muir)
|
||||
|
||||
* LUCENE-5720: Optimize DirectPackedReader's decompression. (Robert Muir)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-5673: MMapDirectory: Work around a "bug" in the JDK that throws
|
||||
|
|
|
@ -182,7 +182,7 @@ class MemoryDocValuesConsumer extends DocValuesConsumer {
|
|||
data.writeLong(gcd);
|
||||
data.writeVInt(BLOCK_SIZE);
|
||||
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.DEFAULT);
|
||||
for (Number nv : values) {
|
||||
long value = nv == null ? 0 : nv.longValue();
|
||||
writer.add((value - minValue) / gcd);
|
||||
|
@ -194,7 +194,7 @@ class MemoryDocValuesConsumer extends DocValuesConsumer {
|
|||
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
||||
data.writeVInt(BLOCK_SIZE);
|
||||
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.DEFAULT);
|
||||
for (Number nv : values) {
|
||||
writer.add(nv == null ? 0 : nv.longValue());
|
||||
}
|
||||
|
|
|
@ -241,7 +241,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
|
|||
|
||||
vectorsStream.writeVInt(PackedInts.VERSION_CURRENT);
|
||||
vectorsStream.writeVInt(chunkSize);
|
||||
writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE);
|
||||
writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE, PackedInts.COMPACT);
|
||||
|
||||
positionsBuf = new int[1024];
|
||||
startOffsetsBuf = new int[1024];
|
||||
|
|
|
@ -153,7 +153,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
|
|||
data.writeLong(gcd);
|
||||
data.writeVInt(BLOCK_SIZE);
|
||||
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.DEFAULT);
|
||||
for (Number nv : values) {
|
||||
long value = nv == null ? 0 : nv.longValue();
|
||||
writer.add((value - minValue) / gcd);
|
||||
|
@ -165,7 +165,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
|
|||
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
||||
data.writeVInt(BLOCK_SIZE);
|
||||
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.DEFAULT);
|
||||
for (Number nv : values) {
|
||||
writer.add(nv == null ? 0 : nv.longValue());
|
||||
}
|
||||
|
|
|
@ -51,6 +51,8 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
public static final int GCD_COMPRESSED = 1;
|
||||
/** Compressed by giving IDs to unique values. */
|
||||
public static final int TABLE_COMPRESSED = 2;
|
||||
/** Compressed using just bitpacked integers */
|
||||
public static final int BITPACK_COMPRESSED = 3;
|
||||
|
||||
/** Uncompressed binary, written directly (fixed length). */
|
||||
public static final int BINARY_FIXED_UNCOMPRESSED = 0;
|
||||
|
@ -99,6 +101,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
long maxValue = Long.MIN_VALUE;
|
||||
long gcd = 0;
|
||||
boolean missing = false;
|
||||
boolean block = true;
|
||||
// TODO: more efficient?
|
||||
HashSet<Long> uniqueValues = null;
|
||||
if (optimizeStorage) {
|
||||
|
@ -138,9 +141,19 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
++count;
|
||||
}
|
||||
} else {
|
||||
for (@SuppressWarnings("unused") Number nv : values) {
|
||||
for (Number nv : values) {
|
||||
long value = nv.longValue();
|
||||
assert value >= -1;
|
||||
minValue = Math.min(minValue, value);
|
||||
maxValue = Math.max(maxValue, value);
|
||||
++count;
|
||||
}
|
||||
|
||||
// packed ints doesnt support valueCount > maxValue, and
|
||||
// we must represent missing ordinal (-1)
|
||||
if (count < Integer.MAX_VALUE && maxValue < Long.MAX_VALUE) {
|
||||
block = false;
|
||||
}
|
||||
}
|
||||
|
||||
final long delta = maxValue - minValue;
|
||||
|
@ -152,6 +165,8 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
format = TABLE_COMPRESSED;
|
||||
} else if (gcd != 0 && gcd != 1) {
|
||||
format = GCD_COMPRESSED;
|
||||
} else if (block == false) {
|
||||
format = BITPACK_COMPRESSED;
|
||||
} else {
|
||||
format = DELTA_COMPRESSED;
|
||||
}
|
||||
|
@ -173,7 +188,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
case GCD_COMPRESSED:
|
||||
meta.writeLong(minValue);
|
||||
meta.writeLong(gcd);
|
||||
final BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.DEFAULT);
|
||||
for (Number nv : values) {
|
||||
long value = nv == null ? 0 : nv.longValue();
|
||||
quotientWriter.add((value - minValue) / gcd);
|
||||
|
@ -181,7 +196,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
quotientWriter.finish();
|
||||
break;
|
||||
case DELTA_COMPRESSED:
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.DEFAULT);
|
||||
for (Number nv : values) {
|
||||
writer.add(nv == null ? 0 : nv.longValue());
|
||||
}
|
||||
|
@ -202,6 +217,18 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
}
|
||||
ordsWriter.finish();
|
||||
break;
|
||||
case BITPACK_COMPRESSED:
|
||||
assert count > 0 && count < Integer.MAX_VALUE;
|
||||
assert maxValue >= -1 && maxValue < Long.MAX_VALUE : maxValue;
|
||||
int bpv = PackedInts.bitsRequired(maxValue+1);
|
||||
bpv = PackedInts.fastestDirectBits(bpv, PackedInts.DEFAULT);
|
||||
meta.writeVInt(bpv);
|
||||
final PackedInts.Writer bitWriter = PackedInts.getWriterNoHeader(data, PackedInts.Format.PACKED, (int) count, bpv, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||
for (Number nv : values) {
|
||||
bitWriter.add(nv.longValue()+1);
|
||||
}
|
||||
bitWriter.finish();
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
|
|
@ -50,6 +50,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* to this table, and those ordinals are compressed with bitpacking ({@link PackedInts}).
|
||||
* <li>GCD-compressed: when all numbers share a common divisor, such as dates, the greatest
|
||||
* common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics.
|
||||
* <li>Bitpack-compressed: per-document integers written as a block for the entire segment.
|
||||
* This technique will only be used when numbers range from {@code -1 .. Long.MAX_VALUE-1},
|
||||
* when the blocking for the delta-compressed method would not provide additional compression.
|
||||
* </ul>
|
||||
* <p>
|
||||
* {@link DocValuesType#BINARY BINARY}:
|
||||
|
@ -96,6 +99,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>GCDNumericEntry --> NumericHeader,MinValue,GCD</li>
|
||||
* <li>TableNumericEntry --> NumericHeader,TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup></li>
|
||||
* <li>DeltaNumericEntry --> NumericHeader</li>
|
||||
* <li>DeltaNumericEntry --> NumericHeader,BitsPerValue</li>
|
||||
* <li>NumericHeader --> FieldNumber,EntryType,NumericType,MissingOffset,PackedVersion,DataOffset,Count,BlockSize</li>
|
||||
* <li>BinaryEntry --> FixedBinaryEntry | VariableBinaryEntry | PrefixBinaryEntry</li>
|
||||
* <li>FixedBinaryEntry --> BinaryHeader</li>
|
||||
|
@ -108,7 +112,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>EntryType,CompressionType --> {@link DataOutput#writeByte Byte}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --> {@link DataOutput#writeLong Int64}</li>
|
||||
* <li>TableSize --> {@link DataOutput#writeVInt vInt}</li>
|
||||
* <li>BitsPerValue,TableSize --> {@link DataOutput#writeVInt vInt}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
|
||||
|
@ -122,15 +126,17 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <ul>
|
||||
* <li>0 --> delta-compressed. For each block of 16k integers, every integer is delta-encoded
|
||||
* from the minimum value within the block.
|
||||
* <li>1 -->, gcd-compressed. When all integers share a common divisor, only quotients are stored
|
||||
* <li>1 --> gcd-compressed. When all integers share a common divisor, only quotients are stored
|
||||
* using blocks of delta-encoded ints.
|
||||
* <li>2 --> table-compressed. When the number of unique numeric values is small and it would save space,
|
||||
* a lookup table of unique values is written, followed by the ordinal for each document.
|
||||
* <li>3 --> bitpack-compressed. When the delta method would not save space, every integer is
|
||||
* delta encoded from {@code -1} for the entire segment.
|
||||
* </ul>
|
||||
* <p>BinaryType indicates how Binary values will be stored:
|
||||
* <ul>
|
||||
* <li>0 --> fixed-width. All values have the same length, addressing by multiplication.
|
||||
* <li>1 -->, variable-width. An address for each value is stored.
|
||||
* <li>1 --> variable-width. An address for each value is stored.
|
||||
* <li>2 --> prefix-compressed. An address to the start of every interval'th value is stored.
|
||||
* </ul>
|
||||
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
|
||||
|
@ -185,7 +191,8 @@ public final class Lucene45DocValuesFormat extends DocValuesFormat {
|
|||
static final int VERSION_START = 0;
|
||||
static final int VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED = 1;
|
||||
static final int VERSION_CHECKSUM = 2;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
static final int VERSION_BITPACK_COMPRESSED = 3;
|
||||
static final int VERSION_CURRENT = VERSION_BITPACK_COMPRESSED;
|
||||
static final byte NUMERIC = 0;
|
||||
static final byte BINARY = 1;
|
||||
static final byte SORTED = 2;
|
||||
|
|
|
@ -25,6 +25,7 @@ import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.GCD_CO
|
|||
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED;
|
||||
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES;
|
||||
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.TABLE_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BITPACK_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat.VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED;
|
||||
|
||||
import java.io.Closeable; // javadocs
|
||||
|
@ -264,6 +265,9 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
entry.table[i] = meta.readLong();
|
||||
}
|
||||
break;
|
||||
case BITPACK_COMPRESSED:
|
||||
entry.bitsRequired = meta.readVInt();
|
||||
break;
|
||||
case DELTA_COMPRESSED:
|
||||
break;
|
||||
default:
|
||||
|
@ -339,6 +343,14 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
case DELTA_COMPRESSED:
|
||||
final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
|
||||
return reader;
|
||||
case BITPACK_COMPRESSED:
|
||||
final PackedInts.Reader bits = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, entry.bitsRequired);
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long id) {
|
||||
return bits.get((int) id) - 1;
|
||||
}
|
||||
};
|
||||
case GCD_COMPRESSED:
|
||||
final long min = entry.minValue;
|
||||
final long mult = entry.gcd;
|
||||
|
@ -484,10 +496,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
final int valueCount = (int) binaries.get(field.number).count;
|
||||
final BinaryDocValues binary = getBinary(field);
|
||||
NumericEntry entry = ords.get(field.number);
|
||||
IndexInput data = this.data.clone();
|
||||
data.seek(entry.offset);
|
||||
final BlockPackedReader ordinals = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
|
||||
|
||||
final LongValues ordinals = getNumeric(entry);
|
||||
return new SortedDocValues() {
|
||||
|
||||
@Override
|
||||
|
@ -686,6 +695,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
/** packed ints blocksize */
|
||||
public int blockSize;
|
||||
|
||||
int bitsRequired;
|
||||
|
||||
long minValue;
|
||||
long gcd;
|
||||
long table[];
|
||||
|
|
|
@ -58,13 +58,16 @@ import org.apache.lucene.store.DataOutput;
|
|||
* @lucene.internal
|
||||
*/
|
||||
public final class BlockPackedWriter extends AbstractBlockPackedWriter {
|
||||
|
||||
final float acceptableOverheadRatio;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
* @param blockSize the number of values of a single block, must be a power of 2
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
*/
|
||||
public BlockPackedWriter(DataOutput out, int blockSize) {
|
||||
public BlockPackedWriter(DataOutput out, int blockSize, float acceptableOverheadRatio) {
|
||||
super(out, blockSize);
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
}
|
||||
|
||||
protected void flush() throws IOException {
|
||||
|
@ -76,7 +79,8 @@ public final class BlockPackedWriter extends AbstractBlockPackedWriter {
|
|||
}
|
||||
|
||||
final long delta = max - min;
|
||||
final int bitsRequired = delta < 0 ? 64 : delta == 0L ? 0 : PackedInts.bitsRequired(delta);
|
||||
int bitsRequired = delta < 0 ? 64 : delta == 0L ? 0 : PackedInts.bitsRequired(delta);
|
||||
bitsRequired = PackedInts.fastestDirectBits(bitsRequired, acceptableOverheadRatio);
|
||||
if (bitsRequired == 64) {
|
||||
// no need to delta-encode
|
||||
min = 0L;
|
||||
|
|
|
@ -23,11 +23,11 @@ import java.io.IOException;
|
|||
|
||||
/* Reads directly from disk on each get */
|
||||
class DirectPackedReader extends PackedInts.ReaderImpl {
|
||||
private final IndexInput in;
|
||||
private final long startPointer;
|
||||
private final long valueMask;
|
||||
final IndexInput in;
|
||||
final long startPointer;
|
||||
final long valueMask;
|
||||
|
||||
public DirectPackedReader(int bitsPerValue, int valueCount, IndexInput in) {
|
||||
DirectPackedReader(int bitsPerValue, int valueCount, IndexInput in) {
|
||||
super(valueCount, bitsPerValue);
|
||||
this.in = in;
|
||||
|
||||
|
@ -90,7 +90,7 @@ class DirectPackedReader extends PackedInts.ReaderImpl {
|
|||
return (rawValue >>> shiftRightBits) & valueMask;
|
||||
|
||||
} catch (IOException ioe) {
|
||||
throw new IllegalStateException("failed", ioe);
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,4 +98,258 @@ class DirectPackedReader extends PackedInts.ReaderImpl {
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static class DirectPackedReader1 extends DirectPackedReader {
|
||||
DirectPackedReader1(int valueCount, IndexInput in) {
|
||||
super(1, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index >>> 3));
|
||||
int shift = 7 - (index & 7);
|
||||
return (in.readByte() >>> shift) & 0x1;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader2 extends DirectPackedReader {
|
||||
DirectPackedReader2(int valueCount, IndexInput in) {
|
||||
super(2, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index >>> 2));
|
||||
int shift = (3 - (index & 3)) << 1;
|
||||
return (in.readByte() >>> shift) & 0x3;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader4 extends DirectPackedReader {
|
||||
DirectPackedReader4(int valueCount, IndexInput in) {
|
||||
super(4, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index >>> 1));
|
||||
int shift = ((index + 1) & 1) << 2;
|
||||
return (in.readByte() >>> shift) & 0xF;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader8 extends DirectPackedReader {
|
||||
DirectPackedReader8(int valueCount, IndexInput in) {
|
||||
super(8, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + index);
|
||||
return in.readByte() & 0xFF;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader12 extends DirectPackedReader {
|
||||
DirectPackedReader12(int valueCount, IndexInput in) {
|
||||
super(12, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
long offset = (index * 12L) >>> 3;
|
||||
in.seek(startPointer + offset);
|
||||
int shift = ((index + 1) & 1) << 2;
|
||||
return (in.readShort() >>> shift) & 0xFFF;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader16 extends DirectPackedReader {
|
||||
DirectPackedReader16(int valueCount, IndexInput in) {
|
||||
super(16, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index<<1));
|
||||
return in.readShort() & 0xFFFF;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader20 extends DirectPackedReader {
|
||||
DirectPackedReader20(int valueCount, IndexInput in) {
|
||||
super(20, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
long offset = (index * 20L) >>> 3;
|
||||
in.seek(startPointer + offset);
|
||||
int v = in.readShort() << 8 | (in.readByte() & 0xFF);
|
||||
int shift = ((index + 1) & 1) << 2;
|
||||
return (v >>> shift) & 0xFFFFF;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader24 extends DirectPackedReader {
|
||||
DirectPackedReader24(int valueCount, IndexInput in) {
|
||||
super(24, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index*3));
|
||||
return (in.readShort() & 0xFFFF) << 8 | (in.readByte() & 0xFF);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader28 extends DirectPackedReader {
|
||||
DirectPackedReader28(int valueCount, IndexInput in) {
|
||||
super(28, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
long offset = (index * 28L) >>> 3;
|
||||
in.seek(startPointer + offset);
|
||||
int shift = ((index + 1) & 1) << 2;
|
||||
return (in.readInt() >>> shift) & 0xFFFFFFFL;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader32 extends DirectPackedReader {
|
||||
DirectPackedReader32(int valueCount, IndexInput in) {
|
||||
super(32, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index<<2));
|
||||
return in.readInt() & 0xFFFFFFFFL;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader40 extends DirectPackedReader {
|
||||
DirectPackedReader40(int valueCount, IndexInput in) {
|
||||
super(40, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index*5));
|
||||
return (in.readInt() & 0xFFFFFFFFL) << 8 | (in.readByte() & 0xFF);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader48 extends DirectPackedReader {
|
||||
DirectPackedReader48(int valueCount, IndexInput in) {
|
||||
super(48, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index*6));
|
||||
return (in.readInt() & 0xFFFFFFFFL) << 16 | (in.readShort() & 0xFFFF);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader56 extends DirectPackedReader {
|
||||
DirectPackedReader56(int valueCount, IndexInput in) {
|
||||
super(56, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index*7));
|
||||
return (in.readInt() & 0xFFFFFFFFL) << 24 | (in.readShort() & 0xFFFF) << 8 | (in.readByte() & 0xFF);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DirectPackedReader64 extends DirectPackedReader {
|
||||
DirectPackedReader64(int valueCount, IndexInput in) {
|
||||
super(64, valueCount, in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
try {
|
||||
in.seek(startPointer + (index<<3));
|
||||
return in.readLong();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static DirectPackedReader getInstance(int bitsPerValue, int valueCount, IndexInput in) {
|
||||
switch(bitsPerValue) {
|
||||
case 1: return new DirectPackedReader1(valueCount, in);
|
||||
case 2: return new DirectPackedReader2(valueCount, in);
|
||||
case 4: return new DirectPackedReader4(valueCount, in);
|
||||
case 8: return new DirectPackedReader8(valueCount, in);
|
||||
case 12: return new DirectPackedReader12(valueCount, in);
|
||||
case 16: return new DirectPackedReader16(valueCount, in);
|
||||
case 20: return new DirectPackedReader20(valueCount, in);
|
||||
case 24: return new DirectPackedReader24(valueCount, in);
|
||||
case 28: return new DirectPackedReader28(valueCount, in);
|
||||
case 32: return new DirectPackedReader32(valueCount, in);
|
||||
case 40: return new DirectPackedReader40(valueCount, in);
|
||||
case 48: return new DirectPackedReader48(valueCount, in);
|
||||
case 56: return new DirectPackedReader56(valueCount, in);
|
||||
case 64: return new DirectPackedReader64(valueCount, in);
|
||||
default: return new DirectPackedReader(bitsPerValue, valueCount, in);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,9 +50,9 @@ public class PackedInts {
|
|||
public static final float FAST = 0.5f;
|
||||
|
||||
/**
|
||||
* At most 20% memory overhead.
|
||||
* At most 25% memory overhead.
|
||||
*/
|
||||
public static final float DEFAULT = 0.2f;
|
||||
public static final float DEFAULT = 0.25f;
|
||||
|
||||
/**
|
||||
* No memory overhead at all, but the returned implementation may be slow.
|
||||
|
@ -282,6 +282,39 @@ public class PackedInts {
|
|||
|
||||
return new FormatAndBits(format, actualBitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to find the number of bits per value that would
|
||||
* read from disk the fastest reader whose overhead is less than
|
||||
* <code>acceptableOverheadRatio</code>.
|
||||
* </p><p>
|
||||
* The <code>acceptableOverheadRatio</code> parameter makes sense for
|
||||
* random-access {@link Reader}s. In case you only plan to perform
|
||||
* sequential access on this stream later on, you should probably use
|
||||
* {@link PackedInts#COMPACT}.
|
||||
* </p><p>
|
||||
*/
|
||||
public static int fastestDirectBits(int bitsPerValue, float acceptableOverheadRatio) {
|
||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||
|
||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||
|
||||
// first see if we can upgrade to byte
|
||||
int byteAlign = (bitsPerValue + 7) & 0xF8;
|
||||
if (byteAlign <= maxBitsPerValue) {
|
||||
return byteAlign;
|
||||
}
|
||||
|
||||
// otherwise try to upgrade to a nibble boundary (for numbers < 32)
|
||||
int nibbleAlign = (bitsPerValue + 3) & 0xFC;
|
||||
if (bitsPerValue < 32 && nibbleAlign <= maxBitsPerValue) {
|
||||
return nibbleAlign;
|
||||
}
|
||||
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* A decoder for packed integers.
|
||||
|
@ -964,7 +997,7 @@ public class PackedInts {
|
|||
}
|
||||
};
|
||||
} else {
|
||||
return new DirectPackedReader(bitsPerValue, valueCount, in);
|
||||
return DirectPackedReader.getInstance(bitsPerValue, valueCount, in);
|
||||
}
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
|
||||
|
|
|
@ -1123,7 +1123,7 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
|
||||
final Directory dir = newDirectory();
|
||||
final IndexOutput out = dir.createOutput("out.bin", IOContext.DEFAULT);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(out, blockSize);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(out, blockSize, PackedInts.COMPACT);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
assertEquals(i, writer.ord());
|
||||
writer.add(values[i]);
|
||||
|
@ -1247,7 +1247,7 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
final int blockSize = 1 << TestUtil.nextInt(random(), 20, 22);
|
||||
final Directory dir = newDirectory();
|
||||
final IndexOutput out = dir.createOutput("out.bin", IOContext.DEFAULT);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(out, blockSize);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(out, blockSize, PackedInts.COMPACT);
|
||||
long value = random().nextInt() & 0xFFFFFFFFL;
|
||||
long valueOffset = TestUtil.nextLong(random(), 0, valueCount - 1);
|
||||
for (long i = 0; i < valueCount; ) {
|
||||
|
|
|
@ -169,7 +169,7 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
|
|||
data.writeLong(gcd);
|
||||
data.writeVInt(BLOCK_SIZE);
|
||||
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.COMPACT);
|
||||
for (Number nv : values) {
|
||||
long value = nv == null ? 0 : nv.longValue();
|
||||
writer.add((value - minValue) / gcd);
|
||||
|
@ -181,7 +181,7 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
|
|||
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
||||
data.writeVInt(BLOCK_SIZE);
|
||||
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
|
||||
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE, PackedInts.COMPACT);
|
||||
for (Number nv : values) {
|
||||
writer.add(nv == null ? 0 : nv.longValue());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue