mirror of
https://github.com/apache/lucene.git
synced 2025-02-18 07:55:29 +00:00
LUCENE-3892: add the all-values-same case back.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1370710 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0db24114ec
commit
482bd77c51
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.blockpacked;
|
|||||||
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
@ -29,7 +30,12 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||||||
* which is determined by the max value in this block.
|
* which is determined by the max value in this block.
|
||||||
*/
|
*/
|
||||||
public class ForUtil {
|
public class ForUtil {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Special number of bits per value used whenever all values to encode are equal.
|
||||||
|
*/
|
||||||
|
private static final int ALL_VALUES_EQUAL = 0;
|
||||||
|
|
||||||
static final int PACKED_INTS_VERSION = 0; // nocommit: encode in the stream?
|
static final int PACKED_INTS_VERSION = 0; // nocommit: encode in the stream?
|
||||||
static final PackedInts.Encoder[] ENCODERS = new PackedInts.Encoder[33];
|
static final PackedInts.Encoder[] ENCODERS = new PackedInts.Encoder[33];
|
||||||
static final PackedInts.Decoder[] DECODERS = new PackedInts.Decoder[33];
|
static final PackedInts.Decoder[] DECODERS = new PackedInts.Decoder[33];
|
||||||
@ -53,19 +59,23 @@ public class ForUtil {
|
|||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
static void writeBlock(long[] data, byte[] encoded, IndexOutput out) throws IOException {
|
static void writeBlock(long[] data, byte[] encoded, IndexOutput out) throws IOException {
|
||||||
|
if (isAllEqual(data)) {
|
||||||
|
out.writeVInt(ALL_VALUES_EQUAL);
|
||||||
|
out.writeInt((int) data[0]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
final int numBits = bitsRequired(data);
|
final int numBits = bitsRequired(data);
|
||||||
assert numBits > 0 && numBits <= 32 : numBits;
|
assert numBits > 0 && numBits <= 32 : numBits;
|
||||||
final PackedInts.Encoder encoder = ENCODERS[numBits];
|
final PackedInts.Encoder encoder = ENCODERS[numBits];
|
||||||
final int iters = ITERATIONS[numBits];
|
final int iters = ITERATIONS[numBits];
|
||||||
assert iters * encoder.valueCount() == BlockPackedPostingsFormat.BLOCK_SIZE;
|
assert iters * encoder.valueCount() == BlockPackedPostingsFormat.BLOCK_SIZE;
|
||||||
final int encodedSize = encoder.blockCount() * iters; // number of 64-bits blocks
|
final int encodedSize = encodedSize(numBits);
|
||||||
assert encodedSize > 0 && encodedSize <= BLOCK_SIZE / 2 : encodedSize;
|
|
||||||
|
|
||||||
out.writeByte((byte) numBits);
|
out.writeVInt(numBits);
|
||||||
out.writeByte((byte) encodedSize);
|
|
||||||
|
|
||||||
encoder.encode(data, 0, encoded, 0, iters);
|
encoder.encode(data, 0, encoded, 0, iters);
|
||||||
out.writeBytes(encoded, encodedSize << 3);
|
out.writeBytes(encoded, encodedSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -77,17 +87,22 @@ public class ForUtil {
|
|||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
static void readBlock(IndexInput in, byte[] encoded, long[] decoded) throws IOException {
|
static void readBlock(IndexInput in, byte[] encoded, long[] decoded) throws IOException {
|
||||||
final int numBits = in.readByte(); // no mask because should be <= 32
|
final int numBits = in.readVInt();
|
||||||
final int encodedSize = in.readByte(); // no mask because should be <= 64
|
assert numBits <= 32 : numBits;
|
||||||
assert numBits > 0 && numBits <= 32 : numBits;
|
|
||||||
assert encodedSize > 0 && encodedSize <= BLOCK_SIZE / 2 : encodedSize; // because blocks are 64-bits and decoded values are 32-bits at most
|
|
||||||
|
|
||||||
in.readBytes(encoded, 0, encodedSize << 3);
|
if (numBits == ALL_VALUES_EQUAL) {
|
||||||
|
final int value = in.readInt();
|
||||||
|
Arrays.fill(decoded, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int encodedSize = encodedSize(numBits);
|
||||||
|
in.readBytes(encoded, 0, encodedSize);
|
||||||
|
|
||||||
final PackedInts.Decoder decoder = DECODERS[numBits];
|
final PackedInts.Decoder decoder = DECODERS[numBits];
|
||||||
final int iters = ITERATIONS[numBits];
|
final int iters = ITERATIONS[numBits];
|
||||||
assert iters * decoder.valueCount() == BLOCK_SIZE;
|
assert iters * decoder.valueCount() == BLOCK_SIZE;
|
||||||
assert iters * decoder.blockCount() == encodedSize;
|
assert 8 * iters * decoder.blockCount() == encodedSize;
|
||||||
|
|
||||||
decoder.decode(encoded, 0, decoded, 0, iters);
|
decoder.decode(encoded, 0, decoded, 0, iters);
|
||||||
}
|
}
|
||||||
@ -99,18 +114,17 @@ public class ForUtil {
|
|||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
static void skipBlock(IndexInput in) throws IOException {
|
static void skipBlock(IndexInput in) throws IOException {
|
||||||
// see readBlock for comments
|
final int numBits = in.readVInt();
|
||||||
final int numBits = in.readByte();
|
|
||||||
final int encodedSize = in.readByte();
|
|
||||||
assert numBits > 0 && numBits <= 32 : numBits;
|
assert numBits > 0 && numBits <= 32 : numBits;
|
||||||
assert encodedSize > 0 && encodedSize <= BLOCK_SIZE / 2 : encodedSize;
|
final int encodedSize = encodedSize(numBits);
|
||||||
in.seek(in.getFilePointer() + (encodedSize << 3));
|
in.seek(in.getFilePointer() + encodedSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read values that have been written using variable-length encoding instead of bit-packing.
|
* Read values that have been written using variable-length encoding instead of bit-packing.
|
||||||
*/
|
*/
|
||||||
static void readVIntBlock(IndexInput docIn, long[] docBuffer, long[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
|
static void readVIntBlock(IndexInput docIn, long[] docBuffer,
|
||||||
|
long[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
|
||||||
if (indexHasFreq) {
|
if (indexHasFreq) {
|
||||||
for(int i=0;i<num;i++) {
|
for(int i=0;i<num;i++) {
|
||||||
final int code = docIn.readVInt();
|
final int code = docIn.readVInt();
|
||||||
@ -128,8 +142,20 @@ public class ForUtil {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nocommit: we must have a util function for this, hmm?
|
||||||
|
private static boolean isAllEqual(final long[] data) {
|
||||||
|
final long v = data[0];
|
||||||
|
for (int i = 1; i < data.length; ++i) {
|
||||||
|
if (data[i] != v) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the number of bits required to serialize any of the longs in <code>data</code>.
|
* Compute the number of bits required to serialize any of the longs in
|
||||||
|
* <code>data</code>.
|
||||||
*/
|
*/
|
||||||
private static int bitsRequired(final long[] data) {
|
private static int bitsRequired(final long[] data) {
|
||||||
long or = 0;
|
long or = 0;
|
||||||
@ -139,4 +165,12 @@ public class ForUtil {
|
|||||||
return PackedInts.bitsRequired(or);
|
return PackedInts.bitsRequired(or);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the number of bytes required to encode a block of values that require
|
||||||
|
* <code>bitsPerValue</code> bits per value.
|
||||||
|
*/
|
||||||
|
private static int encodedSize(int bitsPerValue) {
|
||||||
|
return (BLOCK_SIZE * bitsPerValue) >>> 3;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user