mirror of https://github.com/apache/lucene.git
LUCENE-3892: fix maxChain bug; support numBits=0 (all ints == 0)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1360993 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
101ea65be9
commit
2c0f1e2a4f
|
@ -17,28 +17,27 @@ package org.apache.lucene.codecs.pfor;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||||
|
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||||
|
import org.apache.lucene.codecs.TermsIndexReaderBase;
|
||||||
|
import org.apache.lucene.codecs.TermsIndexWriterBase;
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexReader;
|
import org.apache.lucene.codecs.FixedGapTermsIndexReader;
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
|
import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
|
||||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||||
import org.apache.lucene.codecs.TermsIndexReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.codecs.TermsIndexWriterBase;
|
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsReader;
|
import org.apache.lucene.codecs.sep.SepPostingsReader;
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsWriter;
|
import org.apache.lucene.codecs.sep.SepPostingsWriter;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pass ForFactory to a PostingsWriter/ReaderBase, and get
|
* Pass ForFactory to a PostingsWriter/ReaderBase, and get
|
||||||
|
|
|
@ -63,7 +63,7 @@ public class ForUtil {
|
||||||
|
|
||||||
int header = intBuffer.get();
|
int header = intBuffer.get();
|
||||||
int numInts = (header & MASK[8]) + 1;
|
int numInts = (header & MASK[8]) + 1;
|
||||||
int numBits = ((header >> 8) & MASK[5]) + 1;
|
int numBits = ((header >> 8) & MASK[6]);
|
||||||
|
|
||||||
decompressCore(intBuffer, data, numBits);
|
decompressCore(intBuffer, data, numBits);
|
||||||
|
|
||||||
|
@ -77,10 +77,11 @@ public class ForUtil {
|
||||||
*/
|
*/
|
||||||
static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) {
|
static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) {
|
||||||
assert numBits<=32;
|
assert numBits<=32;
|
||||||
assert numBits>=1;
|
assert numBits>=0;
|
||||||
|
|
||||||
// TODO: PackedIntsDecompress is hardewired to size==129 only
|
// TODO: PackedIntsDecompress is hardewired to size==128 only
|
||||||
switch(numBits) {
|
switch(numBits) {
|
||||||
|
case 0: PackedIntsDecompress.decode0(intBuffer, data); break;
|
||||||
case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
|
case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
|
||||||
case 2: PackedIntsDecompress.decode2(intBuffer, data); break;
|
case 2: PackedIntsDecompress.decode2(intBuffer, data); break;
|
||||||
case 3: PackedIntsDecompress.decode3(intBuffer, data); break;
|
case 3: PackedIntsDecompress.decode3(intBuffer, data); break;
|
||||||
|
@ -145,7 +146,7 @@ public class ForUtil {
|
||||||
* Estimate best num of frame bits according to the largest value.
|
* Estimate best num of frame bits according to the largest value.
|
||||||
*/
|
*/
|
||||||
static int getNumBits(final int[] data, int size) {
|
static int getNumBits(final int[] data, int size) {
|
||||||
int optBits=1;
|
int optBits=0;
|
||||||
for (int i=0; i<size; ++i) {
|
for (int i=0; i<size; ++i) {
|
||||||
while ((data[i] & ~MASK[optBits]) != 0) {
|
while ((data[i] & ~MASK[optBits]) != 0) {
|
||||||
optBits++;
|
optBits++;
|
||||||
|
@ -158,12 +159,12 @@ public class ForUtil {
|
||||||
* Generate the 4 byte header, which contains (from lsb to msb):
|
* Generate the 4 byte header, which contains (from lsb to msb):
|
||||||
*
|
*
|
||||||
* - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
|
* - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
|
||||||
* - 5 bits for num of frame bits - 1
|
* - 6 bits for num of frame bits
|
||||||
* - other bits unused
|
* - other bits unused
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static int getHeader(int numInts, int numBits) {
|
static int getHeader(int numInts, int numBits) {
|
||||||
return (numInts-1)
|
return (numInts-1)
|
||||||
| ((numBits-1) << 8);
|
| ((numBits) << 8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,9 @@ public final class PForUtil extends ForUtil {
|
||||||
|
|
||||||
// the max value possible for current exception pointer,
|
// the max value possible for current exception pointer,
|
||||||
// value of the first pointer is limited by header as 254
|
// value of the first pointer is limited by header as 254
|
||||||
long maxChain = (1<<8) - 2;
|
// (first exception ranges from -1 ~ 254)
|
||||||
|
long maxChainFirst = 254;
|
||||||
|
long maxChain = maxChainFirst + 1;
|
||||||
|
|
||||||
boolean conValue, conForce, conEnd;
|
boolean conValue, conForce, conEnd;
|
||||||
int i=0;
|
int i=0;
|
||||||
|
@ -77,41 +79,35 @@ public final class PForUtil extends ForUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** encode normal area, record exception positions */
|
/** encode normal area, record exception positions */
|
||||||
i=0;
|
|
||||||
excNum = 0;
|
excNum = 0;
|
||||||
if (excFirstPos < 0) { // no exception
|
if (excFirstPos < 0) { // no exception
|
||||||
for (; i<size; ++i) {
|
for (i=0; i<size; ++i) {
|
||||||
encodeNormalValue(intBuffer,i,data[i], numBits);
|
encodeNormalValue(intBuffer,i,data[i], numBits);
|
||||||
}
|
}
|
||||||
excLastPos = -1;
|
excLastPos = -1;
|
||||||
} else {
|
} else {
|
||||||
for (; i<excFirstPos; ++i) {
|
for (i=0; i<excFirstPos; ++i) {
|
||||||
encodeNormalValue(intBuffer,i,data[i], numBits);
|
encodeNormalValue(intBuffer,i,data[i], numBits);
|
||||||
}
|
}
|
||||||
maxChain = 1L<<numBits;
|
maxChain = 1L<<numBits;
|
||||||
excLastPos = -1;
|
excLastPos = excFirstPos;
|
||||||
for (; i<size; ++i) {
|
excNum = i<size? 1:0;
|
||||||
|
for (i=excFirstPos+1; i<size; ++i) {
|
||||||
conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
|
conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
|
||||||
conForce = (i >= maxChain + excLastPos); // force exception
|
conForce = (i >= maxChain + excLastPos); // force exception
|
||||||
conEnd = (excNum == excNumBase); // following forced ignored
|
conEnd = (excNum == excNumBase); // following forced ignored
|
||||||
if ((!conValue && !conForce) || conEnd) {
|
if ((!conValue && !conForce) || conEnd) {
|
||||||
encodeNormalValue(intBuffer,i,data[i], numBits);
|
encodeNormalValue(intBuffer,i,data[i], numBits);
|
||||||
} else {
|
} else {
|
||||||
if (excLastPos >= 0) {
|
|
||||||
encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits);
|
encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits);
|
||||||
}
|
|
||||||
excNum++;
|
excNum++;
|
||||||
excLastPos = i;
|
excLastPos = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (excLastPos >= 0) {
|
|
||||||
encodeNormalValue(intBuffer, excLastPos, (i-excLastPos-1)&MASK[numBits], numBits); // mask out suppressed force exception
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** encode exception area */
|
/** encode exception area */
|
||||||
i=0;
|
for (i=0; i<excNum; ++i) {
|
||||||
for (; i<excNum; ++i) {
|
|
||||||
if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) {
|
if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) {
|
||||||
excBytes=2;
|
excBytes=2;
|
||||||
}
|
}
|
||||||
|
@ -139,11 +135,11 @@ public final class PForUtil extends ForUtil {
|
||||||
intBuffer.rewind();
|
intBuffer.rewind();
|
||||||
|
|
||||||
int header = intBuffer.get();
|
int header = intBuffer.get();
|
||||||
int numInts = (header & MASK[8]) + 1;
|
int numInts = (header & MASK[8]);
|
||||||
int excNum = ((header >> 8) & MASK[8]) + 1;
|
int excNum = ((header >> 8) & MASK[8]) + 1;
|
||||||
int excFirstPos = ((header >> 16) & MASK[8]) - 1;
|
int excFirstPos = ((header >> 16) & MASK[8]) - 1;
|
||||||
int excBytes = PER_EXCEPTION_SIZE[(header >> 29) & MASK[2]];
|
int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
|
||||||
int numBits = ((header >> 24) & MASK[5]) + 1;
|
int numBits = ((header >> 24) & MASK[6]);
|
||||||
|
|
||||||
decompressCore(intBuffer, data, numBits);
|
decompressCore(intBuffer, data, numBits);
|
||||||
|
|
||||||
|
@ -157,6 +153,11 @@ public final class PForUtil extends ForUtil {
|
||||||
intBuffer.put(0, header);
|
intBuffer.put(0, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode exception values into exception area.
|
||||||
|
* The width for each exception will be fixed as:
|
||||||
|
* 1, 2, or 4 byte(s).
|
||||||
|
*/
|
||||||
static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) {
|
static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) {
|
||||||
if (num == 0)
|
if (num == 0)
|
||||||
return;
|
return;
|
||||||
|
@ -251,8 +252,10 @@ public final class PForUtil extends ForUtil {
|
||||||
* It will run 32 times.
|
* It will run 32 times.
|
||||||
*/
|
*/
|
||||||
static int getNumBits(final int[] data, int size) {
|
static int getNumBits(final int[] data, int size) {
|
||||||
|
if (isAllZero(data))
|
||||||
|
return 0;
|
||||||
int optBits=1;
|
int optBits=1;
|
||||||
int optSize=estimateCompressedSize(data,size,1);
|
int optSize=estimateCompressedSize(data,size,optBits);
|
||||||
for (int i=2; i<=32; ++i) {
|
for (int i=2; i<=32; ++i) {
|
||||||
int curSize=estimateCompressedSize(data,size,i);
|
int curSize=estimateCompressedSize(data,size,i);
|
||||||
if (curSize<optSize) {
|
if (curSize<optSize) {
|
||||||
|
@ -263,6 +266,16 @@ public final class PForUtil extends ForUtil {
|
||||||
return optBits;
|
return optBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static boolean isAllZero(final int[] data) {
|
||||||
|
int len=data.length;
|
||||||
|
for (int i=0; i<len; i++) {
|
||||||
|
if (data[i] != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Iterate the whole block to get maximum exception bits,
|
* Iterate the whole block to get maximum exception bits,
|
||||||
* and estimate compressed size without forced exception.
|
* and estimate compressed size without forced exception.
|
||||||
|
@ -303,16 +316,40 @@ public final class PForUtil extends ForUtil {
|
||||||
*
|
*
|
||||||
* 8 bits for the index of the first exception + 1 (when no exception, this is 0)
|
* 8 bits for the index of the first exception + 1 (when no exception, this is 0)
|
||||||
*
|
*
|
||||||
* 5 bits for num of frame bits - 1
|
* 6 bits for num of frame bits
|
||||||
* 2 bits for the exception code: 00: byte, 01: short, 10: int
|
* 2 bits for the exception code: 00: byte, 01: short, 10: int
|
||||||
* 1 bit unused
|
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
// TODO: exception num should never be equal with uncompressed int num!!!
|
||||||
|
// first exception ranges from -1 ~ 255
|
||||||
|
// the problem is that we don't need first exception to be -1 ...
|
||||||
|
// it is ok to range from 0~255, and judge exception for exception num (0~255)
|
||||||
|
// uncompressed int num: (1~256)
|
||||||
static int getHeader(int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
|
static int getHeader(int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
|
||||||
return (numInts-1)
|
return (numInts-1)
|
||||||
| (((excNum-1) & MASK[8]) << 8)
|
| (((excNum-1) & MASK[8]) << 8)
|
||||||
| ((excFirstPos+1) << 16)
|
| ((excFirstPos+1) << 16)
|
||||||
| ((numBits-1) << 24)
|
| ((numBits) << 24)
|
||||||
| ((excBytes/2) << 29);
|
| ((excBytes/2) << 30);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: get metadata from header.
|
||||||
|
*/
|
||||||
|
public static int getNumInts(int header) {
|
||||||
|
return (header & MASK[8]) + 1;
|
||||||
|
}
|
||||||
|
public static int getExcNum(int header) {
|
||||||
|
return ((header >> 8) & MASK[8]) + 1;
|
||||||
|
}
|
||||||
|
public static int getFirstPos(int header) {
|
||||||
|
return ((header >> 16) & MASK[8]) - 1;
|
||||||
|
}
|
||||||
|
public static int getExcBytes(int header) {
|
||||||
|
return PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
|
||||||
|
}
|
||||||
|
public static int getNumBits(int header) {
|
||||||
|
return ((header >> 24) & MASK[6]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.pfor;
|
||||||
/* This code is generated, do not modify. See gendecompress.py */
|
/* This code is generated, do not modify. See gendecompress.py */
|
||||||
|
|
||||||
import java.nio.IntBuffer;
|
import java.nio.IntBuffer;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
final class PackedIntsDecompress {
|
final class PackedIntsDecompress {
|
||||||
|
|
||||||
|
@ -26,6 +27,9 @@ final class PackedIntsDecompress {
|
||||||
|
|
||||||
// NOTE: hardwired to blockSize == 128
|
// NOTE: hardwired to blockSize == 128
|
||||||
|
|
||||||
|
public static void decode0(final IntBuffer compressedBuffer, final int[] output) {
|
||||||
|
Arrays.fill(output, 0);
|
||||||
|
}
|
||||||
public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
|
public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
|
||||||
final int numFrameBits = 1;
|
final int numFrameBits = 1;
|
||||||
final int mask = (int) ((1L<<numFrameBits) - 1);
|
final int mask = (int) ((1L<<numFrameBits) - 1);
|
||||||
|
|
|
@ -69,13 +69,18 @@ def genDecompress():
|
||||||
|
|
||||||
w("\n/* This code is generated, do not modify. See gendecompress.py */\n\n")
|
w("\n/* This code is generated, do not modify. See gendecompress.py */\n\n")
|
||||||
|
|
||||||
w("import java.nio.IntBuffer;\n\n")
|
w("import java.nio.IntBuffer;\n")
|
||||||
|
w("import java.util.Arrays;\n\n")
|
||||||
|
|
||||||
w("final class PackedIntsDecompress {\n")
|
w("final class PackedIntsDecompress {\n")
|
||||||
|
|
||||||
w('\n // nocommit: assess perf of this to see if specializing is really needed\n')
|
w('\n // nocommit: assess perf of this to see if specializing is really needed\n')
|
||||||
w('\n // NOTE: hardwired to blockSize == 128\n\n')
|
w('\n // NOTE: hardwired to blockSize == 128\n\n')
|
||||||
|
|
||||||
|
w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n')
|
||||||
|
w(' Arrays.fill(output, 0);\n')
|
||||||
|
w(' }\n')
|
||||||
|
|
||||||
for numFrameBits in xrange(1, 33):
|
for numFrameBits in xrange(1, 33):
|
||||||
w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
|
w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
|
||||||
w(' final int numFrameBits = %d;\n' % numFrameBits)
|
w(' final int numFrameBits = %d;\n' % numFrameBits)
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the core utility for PFor compress and decompress
|
* Test the core utility for PFor compress and decompress
|
||||||
* We don't provide test case for For encoder/decoder, since
|
* We don't specially provide test case for For encoder/decoder, since
|
||||||
* PFor is a extended version of For, and most methods will be reused
|
* PFor is a extended version of For, and most methods will be reused
|
||||||
* here.
|
* here.
|
||||||
*/
|
*/
|
||||||
|
@ -46,29 +46,140 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test correctness of ignored forced exception.
|
* Should not encode extra information other than header
|
||||||
* The trailing forced exceptions shouldn't be reverted
|
|
||||||
* since they're not necessary.
|
|
||||||
*/
|
*/
|
||||||
public void testForcedException() throws Exception {
|
public void testPForAllZeros() throws Exception {
|
||||||
|
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
||||||
|
int ensz;
|
||||||
|
int[] data=new int[sz];
|
||||||
|
byte[] res = new byte[4+sz*8];
|
||||||
|
int[] copy = new int[sz];
|
||||||
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
|
|
||||||
|
Arrays.fill(data,0);
|
||||||
|
ensz = ForUtil.compress(data,sz,resBuffer); // test For
|
||||||
|
ForUtil.decompress(resBuffer,copy);
|
||||||
|
assert ensz == 4;
|
||||||
|
assert cmp(data,sz,copy,sz)==true;
|
||||||
|
|
||||||
|
Arrays.fill(data,0);
|
||||||
|
ensz = PForUtil.compress(data,sz,resBuffer); // test PFor
|
||||||
|
PForUtil.decompress(resBuffer,copy);
|
||||||
|
assert ensz == 4;
|
||||||
|
assert cmp(data,sz,copy,sz)==true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testForAllZeros() throws Exception {
|
||||||
|
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
||||||
|
int ensz;
|
||||||
|
int[] data=new int[sz];
|
||||||
|
byte[] res = new byte[4+sz*8];
|
||||||
|
int[] copy = new int[sz];
|
||||||
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
|
|
||||||
|
ensz = ForUtil.compress(data,sz,resBuffer);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test correctness of forced exception.
|
||||||
|
* the forced ones should exactly fit max chain
|
||||||
|
*/
|
||||||
|
public void testForcedExceptionDistance() throws Exception {
|
||||||
initRandom();
|
initRandom();
|
||||||
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
||||||
|
int[] data=new int[sz];
|
||||||
|
byte[] res = new byte[4+sz*8];
|
||||||
|
int[] copy = new int[sz];
|
||||||
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
|
int numBits = gen.nextInt(5)+1;
|
||||||
|
|
||||||
|
int i,j;
|
||||||
|
int pace, ensz, header;
|
||||||
|
int expect, got;
|
||||||
|
|
||||||
|
// fill exception value with same pace, there should
|
||||||
|
// be no forced exceptions.
|
||||||
|
createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]);
|
||||||
|
pace = 1<<numBits;
|
||||||
|
for (i=0,j=0; i<sz; i+=pace) {
|
||||||
|
int exc = gen.nextInt();
|
||||||
|
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
ensz = PForUtil.compress(data,sz,resBuffer);
|
||||||
|
header = resBuffer.get(0);
|
||||||
|
expect = j;
|
||||||
|
got = PForUtil.getExcNum(header);
|
||||||
|
assert expect == got: expect+" expected but got "+got;
|
||||||
|
|
||||||
|
// there should exactly one forced exception before each
|
||||||
|
// exception when i>0
|
||||||
|
createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]);
|
||||||
|
pace = (1<<numBits)+1;
|
||||||
|
for (i=0,j=0; i<sz; i+=pace) {
|
||||||
|
int exc = gen.nextInt();
|
||||||
|
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
ensz = PForUtil.compress(data,sz,resBuffer);
|
||||||
|
header = resBuffer.get(0);
|
||||||
|
expect = 2*(j-1)+1;
|
||||||
|
got = PForUtil.getExcNum(header);
|
||||||
|
assert expect == got: expect+" expected but got "+got;
|
||||||
|
|
||||||
|
|
||||||
|
// two forced exception
|
||||||
|
createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]);
|
||||||
|
pace = (1<<numBits)*2+1;
|
||||||
|
for (i=0,j=0; i<sz; i+=pace) {
|
||||||
|
int exc = gen.nextInt();
|
||||||
|
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
ensz = PForUtil.compress(data,sz,resBuffer);
|
||||||
|
header = resBuffer.get(0);
|
||||||
|
expect = 3*(j-1)+1;
|
||||||
|
got = PForUtil.getExcNum(header);
|
||||||
|
assert expect == got: expect+" expected but got "+got;
|
||||||
|
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Test correctness of ignored forced exception.
|
||||||
|
* The trailing forced exceptions should always be reverted
|
||||||
|
* since they're not necessary.
|
||||||
|
*/
|
||||||
|
public void testTrailingForcedException() throws Exception {
|
||||||
|
initRandom();
|
||||||
|
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
||||||
|
assert sz % 32 == 0;
|
||||||
Integer[] buff= new Integer[sz];
|
Integer[] buff= new Integer[sz];
|
||||||
int[] data = new int[sz];
|
int[] data = new int[sz];
|
||||||
int[] copy = new int[sz];
|
int[] copy = new int[sz];
|
||||||
byte[] res = new byte[4+sz*8];
|
byte[] res = new byte[4+sz*8];
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
for (int i=0; i<sz-1; ++i)
|
|
||||||
buff[i]=gen.nextInt() & 1;
|
|
||||||
buff[sz-1]=gen.nextInt() & 0xffffffff; // create only one exception
|
|
||||||
|
|
||||||
Collections.shuffle(Arrays.asList(buff),gen);
|
int excIndex = gen.nextInt(sz/2);
|
||||||
|
int excValue = gen.nextInt();
|
||||||
|
if ((excValue & 0xffff0000) == 0) {
|
||||||
|
excValue |= 0xffff0000; // always prepare a 4 bytes exception
|
||||||
|
}
|
||||||
|
|
||||||
|
// make value of numFrameBits to be small,
|
||||||
|
// thus easy to get forced exceptions
|
||||||
|
for (int i=0; i<sz; ++i) {
|
||||||
|
buff[i]=gen.nextInt() & 1;
|
||||||
|
}
|
||||||
|
// create only one value exception
|
||||||
|
buff[excIndex]=excValue;
|
||||||
|
|
||||||
for (int i=0; i<sz; ++i)
|
for (int i=0; i<sz; ++i)
|
||||||
data[i] = buff[i];
|
data[i] = buff[i];
|
||||||
|
|
||||||
int ensz = PForUtil.compress(data,sz,resBuffer);
|
int ensz = PForUtil.compress(data,sz,resBuffer);
|
||||||
|
|
||||||
assert (ensz <= sz*8+4); // must not exceed the loose upperbound
|
assert (ensz <= sz*8+4): ensz+" > "+sz*8+4; // must not exceed the loose upperbound
|
||||||
|
assert (ensz >= 8); // at least we have a header along with an exception, right?
|
||||||
|
|
||||||
resBuffer.rewind();
|
resBuffer.rewind();
|
||||||
PForUtil.decompress(resBuffer,copy);
|
PForUtil.decompress(resBuffer,copy);
|
||||||
|
@ -77,6 +188,15 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
// println(getHex(res,ensz)+"\n");
|
// println(getHex(res,ensz)+"\n");
|
||||||
// println(getHex(copy,sz)+"\n");
|
// println(getHex(copy,sz)+"\n");
|
||||||
|
|
||||||
|
// fetch the last int, i.e. last exception.
|
||||||
|
int lastExc = (res[ensz-4] << 24) |
|
||||||
|
((0xff & res[ensz-3]) << 16) |
|
||||||
|
((0xff & res[ensz-2]) << 8 ) |
|
||||||
|
(0xff & res[ensz-1]);
|
||||||
|
|
||||||
|
// trailing forced exceptions are suppressed,
|
||||||
|
// so the last exception should be what we assigned.
|
||||||
|
assert lastExc==excValue;
|
||||||
assert cmp(data,sz,copy,sz)==true;
|
assert cmp(data,sz,copy,sz)==true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,18 +207,18 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
*/
|
*/
|
||||||
public void testAllDistribution() throws Exception {
|
public void testAllDistribution() throws Exception {
|
||||||
initRandom();
|
initRandom();
|
||||||
|
int sz = ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
||||||
|
int[] data = new int[sz];
|
||||||
for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
|
for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
|
||||||
double alpha=gen.nextDouble(); // rate of normal value
|
double alpha=gen.nextDouble(); // rate of normal value
|
||||||
for (int j=0; j<=32; ++j) {
|
for (int j=0; j<=32; ++j) {
|
||||||
tryDistribution(ForPostingsFormat.DEFAULT_BLOCK_SIZE,alpha,MASK[i],MASK[j]);
|
createDistribution(data,sz,alpha,MASK[i],MASK[j]);
|
||||||
|
tryCompressAndDecompress(data, sz);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public void tryDistribution(int sz, double alpha, int masknorm, int maskexc) throws Exception {
|
public void createDistribution(int[] data, int sz, double alpha, int masknorm, int maskexc) {
|
||||||
Integer[] buff= new Integer[sz];
|
Integer[] buff= new Integer[sz];
|
||||||
int[] data = new int[sz];
|
|
||||||
byte[] res = new byte[4+sz*8]; // loosely upperbound
|
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
|
||||||
int i=0;
|
int i=0;
|
||||||
for (; i<sz*alpha; ++i)
|
for (; i<sz*alpha; ++i)
|
||||||
buff[i]=gen.nextInt() & masknorm;
|
buff[i]=gen.nextInt() & masknorm;
|
||||||
|
@ -107,6 +227,10 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
Collections.shuffle(Arrays.asList(buff),gen);
|
Collections.shuffle(Arrays.asList(buff),gen);
|
||||||
for (i=0; i<sz; ++i)
|
for (i=0; i<sz; ++i)
|
||||||
data[i] = buff[i];
|
data[i] = buff[i];
|
||||||
|
}
|
||||||
|
public void tryCompressAndDecompress(final int[] data, int sz) throws Exception {
|
||||||
|
byte[] res = new byte[4+sz*8]; // loosely upperbound
|
||||||
|
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
||||||
|
|
||||||
int ensz = PForUtil.compress(data,sz,resBuffer);
|
int ensz = PForUtil.compress(data,sz,resBuffer);
|
||||||
|
|
||||||
|
@ -160,6 +284,9 @@ public class TestPForUtil extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
return hex.toString();
|
return hex.toString();
|
||||||
}
|
}
|
||||||
|
static void eprintln(String format, Object... args) {
|
||||||
|
System.err.println(String.format(format,args));
|
||||||
|
}
|
||||||
static void println(String format, Object... args) {
|
static void println(String format, Object... args) {
|
||||||
System.out.println(String.format(format,args));
|
System.out.println(String.format(format,args));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue