mirror of https://github.com/apache/lucene.git
LUCENE-3892: get numBytes & bit width into single int header
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1362013 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2c0f1e2a4f
commit
9251168dfd
|
@ -39,11 +39,7 @@ import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
|
||||||
|
|
||||||
public final class ForFactory extends IntStreamFactory {
|
public final class ForFactory extends IntStreamFactory {
|
||||||
|
|
||||||
/* number of ints for each block */
|
|
||||||
private final int blockSize;
|
|
||||||
|
|
||||||
public ForFactory() {
|
public ForFactory() {
|
||||||
this.blockSize = ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -51,7 +47,7 @@ public final class ForFactory extends IntStreamFactory {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
IndexOutput out = dir.createOutput(fileName, context);
|
IndexOutput out = dir.createOutput(fileName, context);
|
||||||
try {
|
try {
|
||||||
IntIndexOutput ret = new ForIndexOutput(out, blockSize);
|
IntIndexOutput ret = new ForIndexOutput(out);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -85,10 +81,9 @@ public final class ForFactory extends IntStreamFactory {
|
||||||
private final IntBuffer encodedBuffer;
|
private final IntBuffer encodedBuffer;
|
||||||
|
|
||||||
ForBlockReader(final IndexInput in, final int[] buffer) {
|
ForBlockReader(final IndexInput in, final int[] buffer) {
|
||||||
// upperbound for encoded value should include:
|
// upperbound for encoded value should include(here header is not buffered):
|
||||||
// 1. blockSize of normal value when numFrameBits=32(4x bytes);
|
// blockSize of normal value when numFrameBits=32(4x bytes);
|
||||||
// 2. header (4bytes);
|
this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4];
|
||||||
this.encoded = new byte[blockSize*4+4];
|
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.buffer = buffer;
|
this.buffer = buffer;
|
||||||
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
|
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
|
||||||
|
@ -97,10 +92,11 @@ public final class ForFactory extends IntStreamFactory {
|
||||||
// TODO: implement public void skipBlock() {} ?
|
// TODO: implement public void skipBlock() {} ?
|
||||||
@Override
|
@Override
|
||||||
public void readBlock() throws IOException {
|
public void readBlock() throws IOException {
|
||||||
final int numBytes = in.readInt();
|
final int header = in.readInt();
|
||||||
assert numBytes <= blockSize*4+4;
|
final int numBytes = ForUtil.getEncodedSize(header);
|
||||||
|
assert numBytes <= ForPostingsFormat.DEFAULT_BLOCK_SIZE*4;
|
||||||
in.readBytes(encoded,0,numBytes);
|
in.readBytes(encoded,0,numBytes);
|
||||||
ForUtil.decompress(encodedBuffer,buffer);
|
ForUtil.decompress(encodedBuffer,buffer,header);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,16 +110,17 @@ public final class ForFactory extends IntStreamFactory {
|
||||||
private final byte[] encoded;
|
private final byte[] encoded;
|
||||||
private final IntBuffer encodedBuffer;
|
private final IntBuffer encodedBuffer;
|
||||||
|
|
||||||
ForIndexOutput(IndexOutput out, int blockSize) throws IOException {
|
ForIndexOutput(IndexOutput out) throws IOException {
|
||||||
super(out,blockSize);
|
super(out,ForPostingsFormat.DEFAULT_BLOCK_SIZE);
|
||||||
this.encoded = new byte[blockSize*4+4];
|
this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4];
|
||||||
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
|
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void flushBlock() throws IOException {
|
protected void flushBlock() throws IOException {
|
||||||
final int numBytes = ForUtil.compress(buffer,buffer.length,encodedBuffer);
|
final int header = ForUtil.compress(buffer,encodedBuffer);
|
||||||
out.writeInt(numBytes);
|
final int numBytes = ForUtil.getEncodedSize(header);
|
||||||
|
out.writeInt(header);
|
||||||
out.writeBytes(encoded, numBytes);
|
out.writeBytes(encoded, numBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,48 +17,46 @@ package org.apache.lucene.codecs.pfor;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
|
||||||
import org.apache.lucene.codecs.TermsIndexReaderBase;
|
|
||||||
import org.apache.lucene.codecs.TermsIndexWriterBase;
|
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexReader;
|
import org.apache.lucene.codecs.FixedGapTermsIndexReader;
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
|
import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
|
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.TermsIndexReaderBase;
|
||||||
|
import org.apache.lucene.codecs.TermsIndexWriterBase;
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsReader;
|
import org.apache.lucene.codecs.sep.SepPostingsReader;
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsWriter;
|
import org.apache.lucene.codecs.sep.SepPostingsWriter;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pass ForFactory to a PostingsWriter/ReaderBase, and get
|
* Pass ForFactory to a PostingsWriter/ReaderBase, and get
|
||||||
* customized postings format plugged.
|
* customized postings format plugged.
|
||||||
*/
|
*/
|
||||||
public final class ForPostingsFormat extends PostingsFormat {
|
public final class ForPostingsFormat extends PostingsFormat {
|
||||||
private final int blockSize;
|
|
||||||
private final int minBlockSize;
|
private final int minBlockSize;
|
||||||
private final int maxBlockSize;
|
private final int maxBlockSize;
|
||||||
public final static int DEFAULT_BLOCK_SIZE = 128;
|
public final static int DEFAULT_BLOCK_SIZE = 128;
|
||||||
|
|
||||||
public ForPostingsFormat() {
|
public ForPostingsFormat() {
|
||||||
super("For");
|
super("For");
|
||||||
this.blockSize = DEFAULT_BLOCK_SIZE;
|
|
||||||
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
|
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
|
||||||
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
|
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ForPostingsFormat(int minBlockSize, int maxBlockSize) {
|
public ForPostingsFormat(int minBlockSize, int maxBlockSize) {
|
||||||
super("For");
|
super("For");
|
||||||
this.blockSize = DEFAULT_BLOCK_SIZE;
|
|
||||||
this.minBlockSize = minBlockSize;
|
this.minBlockSize = minBlockSize;
|
||||||
assert minBlockSize > 1;
|
assert minBlockSize > 1;
|
||||||
this.maxBlockSize = maxBlockSize;
|
this.maxBlockSize = maxBlockSize;
|
||||||
|
@ -67,7 +65,7 @@ public final class ForPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return getName() + "(blocksize=" + blockSize + ")";
|
return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -25,7 +25,6 @@ import java.util.Arrays;
|
||||||
* which is determined by the max value in this block.
|
* which is determined by the max value in this block.
|
||||||
*/
|
*/
|
||||||
public class ForUtil {
|
public class ForUtil {
|
||||||
public static final int HEADER_INT_SIZE=1;
|
|
||||||
protected static final int[] MASK = { 0x00000000,
|
protected static final int[] MASK = { 0x00000000,
|
||||||
0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
|
0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
|
||||||
0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
|
0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
|
||||||
|
@ -39,16 +38,30 @@ public class ForUtil {
|
||||||
* @param data uncompressed data
|
* @param data uncompressed data
|
||||||
* @param size num of ints to compress
|
* @param size num of ints to compress
|
||||||
* @param intBuffer integer buffer to hold compressed data
|
* @param intBuffer integer buffer to hold compressed data
|
||||||
|
* @return encoded block byte size
|
||||||
*/
|
*/
|
||||||
public static int compress(final int[] data, int size, IntBuffer intBuffer) {
|
public static int compress(final int[] data, IntBuffer intBuffer) {
|
||||||
int numBits=getNumBits(data,size);
|
int numBits=getNumBits(data);
|
||||||
|
if (numBits == 0) {
|
||||||
|
return compressDuplicateBlock(data,intBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
int size=data.length;
|
||||||
|
int encodedSize = (size*numBits+31)/32;
|
||||||
|
|
||||||
for (int i=0; i<size; ++i) {
|
for (int i=0; i<size; ++i) {
|
||||||
encodeNormalValue(intBuffer,i,data[i], numBits);
|
encodeNormalValue(intBuffer,i,data[i], numBits);
|
||||||
}
|
}
|
||||||
encodeHeader(intBuffer, size, numBits);
|
|
||||||
|
|
||||||
return (HEADER_INT_SIZE+(size*numBits+31)/32)*4;
|
return getHeader(encodedSize, numBits);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save only one int when the whole block equals to 1
|
||||||
|
*/
|
||||||
|
static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
|
||||||
|
intBuffer.put(0,data[0]);
|
||||||
|
return getHeader(1, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Decompress given Integer buffer into int array.
|
/** Decompress given Integer buffer into int array.
|
||||||
|
@ -56,18 +69,13 @@ public class ForUtil {
|
||||||
* @param intBuffer integer buffer to hold compressed data
|
* @param intBuffer integer buffer to hold compressed data
|
||||||
* @param data int array to hold uncompressed data
|
* @param data int array to hold uncompressed data
|
||||||
*/
|
*/
|
||||||
public static int decompress(IntBuffer intBuffer, int[] data) {
|
public static void decompress(IntBuffer intBuffer, int[] data, int header) {
|
||||||
|
|
||||||
// since this buffer is reused at upper level, rewind first
|
// since this buffer is reused at upper level, rewind first
|
||||||
intBuffer.rewind();
|
intBuffer.rewind();
|
||||||
|
|
||||||
int header = intBuffer.get();
|
|
||||||
int numInts = (header & MASK[8]) + 1;
|
|
||||||
int numBits = ((header >> 8) & MASK[6]);
|
int numBits = ((header >> 8) & MASK[6]);
|
||||||
|
|
||||||
decompressCore(intBuffer, data, numBits);
|
decompressCore(intBuffer, data, numBits);
|
||||||
|
|
||||||
return numInts;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -117,15 +125,10 @@ public class ForUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits) {
|
|
||||||
int header = getHeader(numInts,numBits);
|
|
||||||
intBuffer.put(0, header);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void encodeNormalValue(IntBuffer intBuffer, int pos, int value, int numBits) {
|
static void encodeNormalValue(IntBuffer intBuffer, int pos, int value, int numBits) {
|
||||||
final int globalBitPos = numBits*pos; // position in bit stream
|
final int globalBitPos = numBits*pos; // position in bit stream
|
||||||
final int localBitPos = globalBitPos & 31; // position inside an int
|
final int localBitPos = globalBitPos & 31; // position inside an int
|
||||||
int intPos = HEADER_INT_SIZE + globalBitPos/32; // which integer to locate
|
int intPos = globalBitPos/32; // which integer to locate
|
||||||
setBufferIntBits(intBuffer, intPos, localBitPos, numBits, value);
|
setBufferIntBits(intBuffer, intPos, localBitPos, numBits, value);
|
||||||
if ((localBitPos + numBits) > 32) { // value does not fit in this int, fill tail
|
if ((localBitPos + numBits) > 32) { // value does not fit in this int, fill tail
|
||||||
setBufferIntBits(intBuffer, intPos+1, 0,
|
setBufferIntBits(intBuffer, intPos+1, 0,
|
||||||
|
@ -145,8 +148,12 @@ public class ForUtil {
|
||||||
/**
|
/**
|
||||||
* Estimate best num of frame bits according to the largest value.
|
* Estimate best num of frame bits according to the largest value.
|
||||||
*/
|
*/
|
||||||
static int getNumBits(final int[] data, int size) {
|
static int getNumBits(final int[] data) {
|
||||||
int optBits=0;
|
if (isAllEqual(data)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int size=data.length;
|
||||||
|
int optBits=1;
|
||||||
for (int i=0; i<size; ++i) {
|
for (int i=0; i<size; ++i) {
|
||||||
while ((data[i] & ~MASK[optBits]) != 0) {
|
while ((data[i] & ~MASK[optBits]) != 0) {
|
||||||
optBits++;
|
optBits++;
|
||||||
|
@ -155,16 +162,37 @@ public class ForUtil {
|
||||||
return optBits;
|
return optBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static boolean isAllEqual(final int[] data) {
|
||||||
|
int len = data.length;
|
||||||
|
int v = data[0];
|
||||||
|
for (int i=1; i<len; i++) {
|
||||||
|
if (data[i] != v) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate the 4 byte header, which contains (from lsb to msb):
|
* Generate the 4 byte header, which contains (from lsb to msb):
|
||||||
*
|
*
|
||||||
* - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
|
* 8 bits for encoded block int size (excluded header, this limits DEFAULT_BLOCK_SIZE <= 2^8)
|
||||||
* - 6 bits for num of frame bits
|
* 6 bits for num of frame bits (when 0, values in this block are all the same)
|
||||||
* - other bits unused
|
* other bits unused
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static int getHeader(int numInts, int numBits) {
|
static int getHeader(int encodedSize, int numBits) {
|
||||||
return (numInts-1)
|
return (encodedSize)
|
||||||
| ((numBits) << 8);
|
| ((numBits) << 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: get metadata from header.
|
||||||
|
*/
|
||||||
|
public static int getEncodedSize(int header) {
|
||||||
|
return ((header & MASK[8]))*4;
|
||||||
|
}
|
||||||
|
public static int getNumBits(int header) {
|
||||||
|
return ((header >> 8) & MASK[6]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,11 +39,7 @@ import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
|
||||||
|
|
||||||
public final class PForFactory extends IntStreamFactory {
|
public final class PForFactory extends IntStreamFactory {
|
||||||
|
|
||||||
/* number of ints for each block */
|
|
||||||
private final int blockSize;
|
|
||||||
|
|
||||||
public PForFactory() {
|
public PForFactory() {
|
||||||
this.blockSize=PForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -51,7 +47,7 @@ public final class PForFactory extends IntStreamFactory {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
IndexOutput out = dir.createOutput(fileName, context);
|
IndexOutput out = dir.createOutput(fileName, context);
|
||||||
try {
|
try {
|
||||||
IntIndexOutput ret = new PForIndexOutput(out, blockSize);
|
IntIndexOutput ret = new PForIndexOutput(out);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -85,11 +81,10 @@ public final class PForFactory extends IntStreamFactory {
|
||||||
private final IntBuffer encodedBuffer;
|
private final IntBuffer encodedBuffer;
|
||||||
|
|
||||||
PForBlockReader(final IndexInput in, final int[] buffer) {
|
PForBlockReader(final IndexInput in, final int[] buffer) {
|
||||||
// upperbound for encoded value should include:
|
// upperbound for encoded value should include(here header is not buffered):
|
||||||
// 1. blockSize of normal value (4x bytes);
|
// 1. blockSize of normal value (4x bytes);
|
||||||
// 2. blockSize of exception value (4x bytes);
|
// 2. blockSize of exception value (4x bytes);
|
||||||
// 3. header (4bytes);
|
this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
|
||||||
this.encoded = new byte[blockSize*8+4];
|
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.buffer = buffer;
|
this.buffer = buffer;
|
||||||
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
|
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
|
||||||
|
@ -98,10 +93,11 @@ public final class PForFactory extends IntStreamFactory {
|
||||||
// TODO: implement public void skipBlock() {} ?
|
// TODO: implement public void skipBlock() {} ?
|
||||||
@Override
|
@Override
|
||||||
public void readBlock() throws IOException {
|
public void readBlock() throws IOException {
|
||||||
final int numBytes = in.readInt();
|
final int header = in.readInt();
|
||||||
assert numBytes <= blockSize*8+4;
|
final int numBytes = PForUtil.getEncodedSize(header);
|
||||||
|
assert numBytes <= PForPostingsFormat.DEFAULT_BLOCK_SIZE*8;
|
||||||
in.readBytes(encoded,0,numBytes);
|
in.readBytes(encoded,0,numBytes);
|
||||||
PForUtil.decompress(encodedBuffer,buffer);
|
PForUtil.decompress(encodedBuffer,buffer,header);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,16 +111,17 @@ public final class PForFactory extends IntStreamFactory {
|
||||||
private final byte[] encoded;
|
private final byte[] encoded;
|
||||||
private final IntBuffer encodedBuffer;
|
private final IntBuffer encodedBuffer;
|
||||||
|
|
||||||
PForIndexOutput(IndexOutput out, int blockSize) throws IOException {
|
PForIndexOutput(IndexOutput out) throws IOException {
|
||||||
super(out,blockSize);
|
super(out, PForPostingsFormat.DEFAULT_BLOCK_SIZE);
|
||||||
this.encoded = new byte[blockSize*8+4];
|
this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
|
||||||
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
|
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void flushBlock() throws IOException {
|
protected void flushBlock() throws IOException {
|
||||||
final int numBytes = PForUtil.compress(buffer,buffer.length,encodedBuffer);
|
final int header = PForUtil.compress(buffer,encodedBuffer);
|
||||||
out.writeInt(numBytes);
|
final int numBytes = PForUtil.getEncodedSize(header);
|
||||||
|
out.writeInt(header);
|
||||||
out.writeBytes(encoded, numBytes);
|
out.writeBytes(encoded, numBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,20 +45,17 @@ import org.apache.lucene.util.IOUtils;
|
||||||
* customized postings format plugged.
|
* customized postings format plugged.
|
||||||
*/
|
*/
|
||||||
public final class PForPostingsFormat extends PostingsFormat {
|
public final class PForPostingsFormat extends PostingsFormat {
|
||||||
private final int blockSize;
|
|
||||||
private final int minBlockSize;
|
private final int minBlockSize;
|
||||||
private final int maxBlockSize;
|
private final int maxBlockSize;
|
||||||
public final static int DEFAULT_BLOCK_SIZE = 128;
|
public final static int DEFAULT_BLOCK_SIZE = 128;
|
||||||
|
|
||||||
public PForPostingsFormat() {
|
public PForPostingsFormat() {
|
||||||
super("PFor");
|
super("PFor");
|
||||||
this.blockSize = DEFAULT_BLOCK_SIZE;
|
|
||||||
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
|
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
|
||||||
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
|
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
|
||||||
}
|
}
|
||||||
public PForPostingsFormat(int minBlockSize, int maxBlockSize) {
|
public PForPostingsFormat(int minBlockSize, int maxBlockSize) {
|
||||||
super("PFor");
|
super("PFor");
|
||||||
this.blockSize = DEFAULT_BLOCK_SIZE;
|
|
||||||
this.minBlockSize = minBlockSize;
|
this.minBlockSize = minBlockSize;
|
||||||
assert minBlockSize > 1;
|
assert minBlockSize > 1;
|
||||||
this.maxBlockSize = maxBlockSize;
|
this.maxBlockSize = maxBlockSize;
|
||||||
|
@ -67,7 +64,7 @@ public final class PForPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return getName() + "(blocksize=" + blockSize + ")";
|
return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE+ ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -32,13 +32,17 @@ public final class PForUtil extends ForUtil {
|
||||||
/** Compress given int[] into Integer buffer, with PFor format
|
/** Compress given int[] into Integer buffer, with PFor format
|
||||||
*
|
*
|
||||||
* @param data uncompressed data
|
* @param data uncompressed data
|
||||||
* @param size num of ints to compress
|
|
||||||
* @param intBuffer integer buffer to hold compressed data
|
* @param intBuffer integer buffer to hold compressed data
|
||||||
|
* @return block header
|
||||||
*/
|
*/
|
||||||
public static int compress(final int[] data, int size, IntBuffer intBuffer) {
|
public static int compress(final int[] data, IntBuffer intBuffer) {
|
||||||
/** estimate minimum compress size to determine numFrameBits */
|
/** estimate minimum compress size to determine numFrameBits */
|
||||||
int numBits=getNumBits(data,size);
|
int numBits=getNumBits(data);
|
||||||
|
if (numBits == 0) {
|
||||||
|
return compressDuplicateBlock(data,intBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = data.length;
|
||||||
int[] excValues = new int[size];
|
int[] excValues = new int[size];
|
||||||
int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1;
|
int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1;
|
||||||
|
|
||||||
|
@ -115,13 +119,13 @@ public final class PForUtil extends ForUtil {
|
||||||
excBytes=4;
|
excBytes=4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
excByteOffset = HEADER_INT_SIZE*4 + (size*numBits + 7)/8;
|
excByteOffset = (size*numBits + 7)/8;
|
||||||
encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
|
encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
|
||||||
|
|
||||||
/** encode header */
|
/** encode header */
|
||||||
encodeHeader(intBuffer, size, numBits, excNum, excFirstPos, excBytes);
|
int encodedSize = (excByteOffset + excBytes*excNum + 3)/4;
|
||||||
|
|
||||||
return (excByteOffset + excBytes*excNum + 3)/4*4;
|
return getHeader(encodedSize, numBits, excNum, excFirstPos, excBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Decompress given Integer buffer into int array.
|
/** Decompress given Integer buffer into int array.
|
||||||
|
@ -129,13 +133,10 @@ public final class PForUtil extends ForUtil {
|
||||||
* @param intBuffer integer buffer to hold compressed data
|
* @param intBuffer integer buffer to hold compressed data
|
||||||
* @param data int array to hold uncompressed data
|
* @param data int array to hold uncompressed data
|
||||||
*/
|
*/
|
||||||
public static int decompress(IntBuffer intBuffer, int[] data) {
|
public static void decompress(IntBuffer intBuffer, int[] data, int header) {
|
||||||
|
|
||||||
// since this buffer is reused at upper level, rewind first
|
// since this buffer is reused at upper level, rewind first
|
||||||
intBuffer.rewind();
|
intBuffer.rewind();
|
||||||
|
|
||||||
int header = intBuffer.get();
|
|
||||||
int numInts = (header & MASK[8]);
|
|
||||||
int excNum = ((header >> 8) & MASK[8]) + 1;
|
int excNum = ((header >> 8) & MASK[8]) + 1;
|
||||||
int excFirstPos = ((header >> 16) & MASK[8]) - 1;
|
int excFirstPos = ((header >> 16) & MASK[8]) - 1;
|
||||||
int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
|
int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
|
||||||
|
@ -144,13 +145,6 @@ public final class PForUtil extends ForUtil {
|
||||||
decompressCore(intBuffer, data, numBits);
|
decompressCore(intBuffer, data, numBits);
|
||||||
|
|
||||||
patchException(intBuffer,data,excNum,excFirstPos,excBytes);
|
patchException(intBuffer,data,excNum,excFirstPos,excBytes);
|
||||||
|
|
||||||
return numInts;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
|
|
||||||
int header = getHeader(numInts,numBits,excNum,excFirstPos,excBytes);
|
|
||||||
intBuffer.put(0, header);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -189,6 +183,14 @@ public final class PForUtil extends ForUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save only header when the whole block equals to 1
|
||||||
|
*/
|
||||||
|
static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
|
||||||
|
intBuffer.put(0,data[0]);
|
||||||
|
return getHeader(1, 0, 0, -1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode exception values base on the exception pointers in normal area,
|
* Decode exception values base on the exception pointers in normal area,
|
||||||
* and values in exception area.
|
* and values in exception area.
|
||||||
|
@ -200,7 +202,6 @@ public final class PForUtil extends ForUtil {
|
||||||
* In this case we should preprocess patch several heading exceptions,
|
* In this case we should preprocess patch several heading exceptions,
|
||||||
* before calling this method.
|
* before calling this method.
|
||||||
*
|
*
|
||||||
* TODO: blockSize is hardewired to size==128 only
|
|
||||||
*/
|
*/
|
||||||
public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
|
public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
|
||||||
if (excFirstPos == -1) {
|
if (excFirstPos == -1) {
|
||||||
|
@ -251,13 +252,14 @@ public final class PForUtil extends ForUtil {
|
||||||
* Estimate best number of frame bits according to minimum compressed size.
|
* Estimate best number of frame bits according to minimum compressed size.
|
||||||
* It will run 32 times.
|
* It will run 32 times.
|
||||||
*/
|
*/
|
||||||
static int getNumBits(final int[] data, int size) {
|
static int getNumBits(final int[] data) {
|
||||||
if (isAllZero(data))
|
if (isAllEqual(data)) {
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
int optBits=1;
|
int optBits=1;
|
||||||
int optSize=estimateCompressedSize(data,size,optBits);
|
int optSize=estimateCompressedSize(data,optBits);
|
||||||
for (int i=2; i<=32; ++i) {
|
for (int i=2; i<=32; ++i) {
|
||||||
int curSize=estimateCompressedSize(data,size,i);
|
int curSize=estimateCompressedSize(data,i);
|
||||||
if (curSize<optSize) {
|
if (curSize<optSize) {
|
||||||
optSize=curSize;
|
optSize=curSize;
|
||||||
optBits=i;
|
optBits=i;
|
||||||
|
@ -266,22 +268,13 @@ public final class PForUtil extends ForUtil {
|
||||||
return optBits;
|
return optBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean isAllZero(final int[] data) {
|
|
||||||
int len=data.length;
|
|
||||||
for (int i=0; i<len; i++) {
|
|
||||||
if (data[i] != 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Iterate the whole block to get maximum exception bits,
|
* Iterate the whole block to get maximum exception bits,
|
||||||
* and estimate compressed size without forced exception.
|
* and estimate compressed size without forced exception.
|
||||||
* TODO: foresee forced exception for better estimation
|
* TODO: foresee forced exception for better estimation
|
||||||
*/
|
*/
|
||||||
static int estimateCompressedSize(final int[] data, int size, int numBits) {
|
static int estimateCompressedSize(final int[] data, int numBits) {
|
||||||
|
int size=data.length;
|
||||||
int totalBytes=(numBits*size+7)/8; // always round to byte
|
int totalBytes=(numBits*size+7)/8; // always round to byte
|
||||||
int excNum=0;
|
int excNum=0;
|
||||||
int curExcBytes=1;
|
int curExcBytes=1;
|
||||||
|
@ -304,29 +297,24 @@ public final class PForUtil extends ForUtil {
|
||||||
}
|
}
|
||||||
totalBytes+=excNum*curExcBytes;
|
totalBytes+=excNum*curExcBytes;
|
||||||
|
|
||||||
return totalBytes/4*4+HEADER_INT_SIZE; // round up to ints
|
return totalBytes/4*4; // round up to ints
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate the 4 byte header, which contains (from lsb to msb):
|
* Generate the 4 byte header which contains (from lsb to msb):
|
||||||
*
|
*
|
||||||
* 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
|
* 8 bits for encoded block int size (excluding header, this limits DEFAULT_BLOCK_SIZE <= 2^(8-1))
|
||||||
*
|
*
|
||||||
* 8 bits for exception num - 1 (when no exceptions, this is undefined)
|
* 8 bits for exception num - 1 (when no exceptions, this is undefined)
|
||||||
*
|
*
|
||||||
* 8 bits for the index of the first exception + 1 (when no exception, this is 0)
|
* 8 bits for the index of the first exception + 1 (when no exception, this is 0)
|
||||||
*
|
*
|
||||||
* 6 bits for num of frame bits
|
* 6 bits for num of frame bits (when 0, values in this block are all the same)
|
||||||
* 2 bits for the exception code: 00: byte, 01: short, 10: int
|
* 2 bits for the exception code: 00: byte, 01: short, 10: int
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
// TODO: exception num should never be equal with uncompressed int num!!!
|
static int getHeader(int encodedSize, int numBits, int excNum, int excFirstPos, int excBytes) {
|
||||||
// first exception ranges from -1 ~ 255
|
return (encodedSize)
|
||||||
// the problem is that we don't need first exception to be -1 ...
|
|
||||||
// it is ok to range from 0~255, and judge exception for exception num (0~255)
|
|
||||||
// uncompressed int num: (1~256)
|
|
||||||
static int getHeader(int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
|
|
||||||
return (numInts-1)
|
|
||||||
| (((excNum-1) & MASK[8]) << 8)
|
| (((excNum-1) & MASK[8]) << 8)
|
||||||
| ((excFirstPos+1) << 16)
|
| ((excFirstPos+1) << 16)
|
||||||
| ((numBits) << 24)
|
| ((numBits) << 24)
|
||||||
|
@ -337,8 +325,8 @@ public final class PForUtil extends ForUtil {
|
||||||
/**
|
/**
|
||||||
* Expert: get metadata from header.
|
* Expert: get metadata from header.
|
||||||
*/
|
*/
|
||||||
public static int getNumInts(int header) {
|
public static int getEncodedSize(int header) {
|
||||||
return (header & MASK[8]) + 1;
|
return ((header & MASK[8]))*4;
|
||||||
}
|
}
|
||||||
public static int getExcNum(int header) {
|
public static int getExcNum(int header) {
|
||||||
return ((header >> 8) & MASK[8]) + 1;
|
return ((header >> 8) & MASK[8]) + 1;
|
||||||
|
|
|
@ -28,7 +28,7 @@ final class PackedIntsDecompress {
|
||||||
// NOTE: hardwired to blockSize == 128
|
// NOTE: hardwired to blockSize == 128
|
||||||
|
|
||||||
public static void decode0(final IntBuffer compressedBuffer, final int[] output) {
|
public static void decode0(final IntBuffer compressedBuffer, final int[] output) {
|
||||||
Arrays.fill(output, 0);
|
Arrays.fill(output, compressedBuffer.get());
|
||||||
}
|
}
|
||||||
public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
|
public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
|
||||||
final int numFrameBits = 1;
|
final int numFrameBits = 1;
|
||||||
|
|
|
@ -78,7 +78,7 @@ def genDecompress():
|
||||||
w('\n // NOTE: hardwired to blockSize == 128\n\n')
|
w('\n // NOTE: hardwired to blockSize == 128\n\n')
|
||||||
|
|
||||||
w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n')
|
w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n')
|
||||||
w(' Arrays.fill(output, 0);\n')
|
w(' Arrays.fill(output, compressedBuffer.get());\n')
|
||||||
w(' }\n')
|
w(' }\n')
|
||||||
|
|
||||||
for numFrameBits in xrange(1, 33):
|
for numFrameBits in xrange(1, 33):
|
||||||
|
|
|
@ -46,39 +46,33 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should not encode extra information other than header
|
* Should not encode extra information other than single int
|
||||||
*/
|
*/
|
||||||
public void testPForAllZeros() throws Exception {
|
public void testAllEqual() throws Exception {
|
||||||
|
initRandom();
|
||||||
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
||||||
int ensz;
|
|
||||||
int[] data=new int[sz];
|
int[] data=new int[sz];
|
||||||
byte[] res = new byte[4+sz*8];
|
byte[] res = new byte[sz*8];
|
||||||
int[] copy = new int[sz];
|
int[] copy = new int[sz];
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
|
|
||||||
Arrays.fill(data,0);
|
|
||||||
ensz = ForUtil.compress(data,sz,resBuffer); // test For
|
|
||||||
ForUtil.decompress(resBuffer,copy);
|
|
||||||
assert ensz == 4;
|
|
||||||
assert cmp(data,sz,copy,sz)==true;
|
|
||||||
|
|
||||||
Arrays.fill(data,0);
|
|
||||||
ensz = PForUtil.compress(data,sz,resBuffer); // test PFor
|
|
||||||
PForUtil.decompress(resBuffer,copy);
|
|
||||||
assert ensz == 4;
|
|
||||||
assert cmp(data,sz,copy,sz)==true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testForAllZeros() throws Exception {
|
|
||||||
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
|
||||||
int ensz;
|
int ensz;
|
||||||
int[] data=new int[sz];
|
int header;
|
||||||
byte[] res = new byte[4+sz*8];
|
|
||||||
int[] copy = new int[sz];
|
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
|
||||||
|
|
||||||
ensz = ForUtil.compress(data,sz,resBuffer);
|
Arrays.fill(data,gen.nextInt());
|
||||||
|
header = ForUtil.compress(data,resBuffer); // test For
|
||||||
|
ensz = ForUtil.getEncodedSize(header);
|
||||||
|
assert ensz == 4;
|
||||||
|
|
||||||
|
ForUtil.decompress(resBuffer,copy,header);
|
||||||
|
assert cmp(data,sz,copy,sz)==true;
|
||||||
|
|
||||||
|
Arrays.fill(data,gen.nextInt());
|
||||||
|
header = PForUtil.compress(data,resBuffer); // test PFor
|
||||||
|
ensz = PForUtil.getEncodedSize(header);
|
||||||
|
assert ensz == 4;
|
||||||
|
|
||||||
|
PForUtil.decompress(resBuffer,copy,header);
|
||||||
|
assert cmp(data,sz,copy,sz)==true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -89,7 +83,7 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
initRandom();
|
initRandom();
|
||||||
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
||||||
int[] data=new int[sz];
|
int[] data=new int[sz];
|
||||||
byte[] res = new byte[4+sz*8];
|
byte[] res = new byte[sz*8];
|
||||||
int[] copy = new int[sz];
|
int[] copy = new int[sz];
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
int numBits = gen.nextInt(5)+1;
|
int numBits = gen.nextInt(5)+1;
|
||||||
|
@ -107,8 +101,8 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
ensz = PForUtil.compress(data,sz,resBuffer);
|
header = PForUtil.compress(data,resBuffer);
|
||||||
header = resBuffer.get(0);
|
ensz = PForUtil.getEncodedSize(header);
|
||||||
expect = j;
|
expect = j;
|
||||||
got = PForUtil.getExcNum(header);
|
got = PForUtil.getExcNum(header);
|
||||||
assert expect == got: expect+" expected but got "+got;
|
assert expect == got: expect+" expected but got "+got;
|
||||||
|
@ -122,8 +116,8 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
ensz = PForUtil.compress(data,sz,resBuffer);
|
header = PForUtil.compress(data,resBuffer);
|
||||||
header = resBuffer.get(0);
|
ensz = PForUtil.getEncodedSize(header);
|
||||||
expect = 2*(j-1)+1;
|
expect = 2*(j-1)+1;
|
||||||
got = PForUtil.getExcNum(header);
|
got = PForUtil.getExcNum(header);
|
||||||
assert expect == got: expect+" expected but got "+got;
|
assert expect == got: expect+" expected but got "+got;
|
||||||
|
@ -137,8 +131,8 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
ensz = PForUtil.compress(data,sz,resBuffer);
|
header = PForUtil.compress(data,resBuffer);
|
||||||
header = resBuffer.get(0);
|
ensz = PForUtil.getEncodedSize(header);
|
||||||
expect = 3*(j-1)+1;
|
expect = 3*(j-1)+1;
|
||||||
got = PForUtil.getExcNum(header);
|
got = PForUtil.getExcNum(header);
|
||||||
assert expect == got: expect+" expected but got "+got;
|
assert expect == got: expect+" expected but got "+got;
|
||||||
|
@ -156,7 +150,7 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
Integer[] buff= new Integer[sz];
|
Integer[] buff= new Integer[sz];
|
||||||
int[] data = new int[sz];
|
int[] data = new int[sz];
|
||||||
int[] copy = new int[sz];
|
int[] copy = new int[sz];
|
||||||
byte[] res = new byte[4+sz*8];
|
byte[] res = new byte[sz*8];
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
|
|
||||||
int excIndex = gen.nextInt(sz/2);
|
int excIndex = gen.nextInt(sz/2);
|
||||||
|
@ -176,13 +170,13 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
for (int i=0; i<sz; ++i)
|
for (int i=0; i<sz; ++i)
|
||||||
data[i] = buff[i];
|
data[i] = buff[i];
|
||||||
|
|
||||||
int ensz = PForUtil.compress(data,sz,resBuffer);
|
int header = PForUtil.compress(data,resBuffer);
|
||||||
|
int ensz = PForUtil.getEncodedSize(header);
|
||||||
|
|
||||||
assert (ensz <= sz*8+4): ensz+" > "+sz*8+4; // must not exceed the loose upperbound
|
assert (ensz <= sz*8): ensz+" > "+sz*8; // must not exceed the loose upperbound
|
||||||
assert (ensz >= 8); // at least we have a header along with an exception, right?
|
assert (ensz >= 4); // at least we have an exception, right?
|
||||||
|
|
||||||
resBuffer.rewind();
|
PForUtil.decompress(resBuffer,copy,header);
|
||||||
PForUtil.decompress(resBuffer,copy);
|
|
||||||
|
|
||||||
// println(getHex(data,sz)+"\n");
|
// println(getHex(data,sz)+"\n");
|
||||||
// println(getHex(res,ensz)+"\n");
|
// println(getHex(res,ensz)+"\n");
|
||||||
|
@ -211,7 +205,7 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
int[] data = new int[sz];
|
int[] data = new int[sz];
|
||||||
for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
|
for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
|
||||||
double alpha=gen.nextDouble(); // rate of normal value
|
double alpha=gen.nextDouble(); // rate of normal value
|
||||||
for (int j=0; j<=32; ++j) {
|
for (int j=i; j<=32; ++j) {
|
||||||
createDistribution(data,sz,alpha,MASK[i],MASK[j]);
|
createDistribution(data,sz,alpha,MASK[i],MASK[j]);
|
||||||
tryCompressAndDecompress(data, sz);
|
tryCompressAndDecompress(data, sz);
|
||||||
}
|
}
|
||||||
|
@ -229,15 +223,16 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
data[i] = buff[i];
|
data[i] = buff[i];
|
||||||
}
|
}
|
||||||
public void tryCompressAndDecompress(final int[] data, int sz) throws Exception {
|
public void tryCompressAndDecompress(final int[] data, int sz) throws Exception {
|
||||||
byte[] res = new byte[4+sz*8]; // loosely upperbound
|
byte[] res = new byte[sz*8]; // loosely upperbound
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
|
|
||||||
int ensz = PForUtil.compress(data,sz,resBuffer);
|
int header = PForUtil.compress(data,resBuffer);
|
||||||
|
int ensz = PForUtil.getEncodedSize(header);
|
||||||
|
|
||||||
assert (ensz <= sz*8+4); // must not exceed the loose upperbound
|
assert (ensz <= sz*8); // must not exceed the loose upperbound
|
||||||
|
|
||||||
int[] copy = new int[sz];
|
int[] copy = new int[sz];
|
||||||
PForUtil.decompress(resBuffer,copy);
|
PForUtil.decompress(resBuffer,copy,header);
|
||||||
|
|
||||||
// println(getHex(data,sz)+"\n");
|
// println(getHex(data,sz)+"\n");
|
||||||
// println(getHex(res,ensz)+"\n");
|
// println(getHex(res,ensz)+"\n");
|
||||||
|
|
Loading…
Reference in New Issue