From 9251168dfdcc38b5e59680d62ac86eae7cab81df Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 16 Jul 2012 12:55:26 +0000 Subject: [PATCH] LUCENE-3892: get numBytes & bit width into single int header git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1362013 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/codecs/pfor/ForFactory.java | 31 ++++--- .../lucene/codecs/pfor/ForPostingsFormat.java | 28 +++---- .../apache/lucene/codecs/pfor/ForUtil.java | 78 ++++++++++++------ .../lucene/codecs/pfor/PForFactory.java | 29 +++---- .../codecs/pfor/PForPostingsFormat.java | 5 +- .../apache/lucene/codecs/pfor/PForUtil.java | 82 ++++++++----------- .../codecs/pfor/PackedIntsDecompress.java | 2 +- .../lucene/codecs/pfor/gendecompress.py | 2 +- .../lucene/codecs/pfor/TestPForUtil.java | 81 +++++++++--------- 9 files changed, 169 insertions(+), 169 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java index 2ac4e10019b..9dff2c2f60f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java @@ -39,11 +39,7 @@ import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput; public final class ForFactory extends IntStreamFactory { - /* number of ints for each block */ - private final int blockSize; - public ForFactory() { - this.blockSize = ForPostingsFormat.DEFAULT_BLOCK_SIZE; } @Override @@ -51,7 +47,7 @@ public final class ForFactory extends IntStreamFactory { boolean success = false; IndexOutput out = dir.createOutput(fileName, context); try { - IntIndexOutput ret = new ForIndexOutput(out, blockSize); + IntIndexOutput ret = new ForIndexOutput(out); success = true; return ret; } finally { @@ -85,10 +81,9 @@ public final class ForFactory extends IntStreamFactory { private final IntBuffer encodedBuffer; ForBlockReader(final IndexInput in, final int[] buffer) { - // upperbound for encoded value should include: - // 1. blockSize of normal value when numFrameBits=32(4x bytes); - // 2. header (4bytes); - this.encoded = new byte[blockSize*4+4]; + // upperbound for encoded value should include(here header is not buffered): + // blockSize of normal value when numFrameBits=32(4x bytes); + this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4]; this.in = in; this.buffer = buffer; this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); @@ -97,10 +92,11 @@ public final class ForFactory extends IntStreamFactory { // TODO: implement public void skipBlock() {} ? @Override public void readBlock() throws IOException { - final int numBytes = in.readInt(); - assert numBytes <= blockSize*4+4; + final int header = in.readInt(); + final int numBytes = ForUtil.getEncodedSize(header); + assert numBytes <= ForPostingsFormat.DEFAULT_BLOCK_SIZE*4; in.readBytes(encoded,0,numBytes); - ForUtil.decompress(encodedBuffer,buffer); + ForUtil.decompress(encodedBuffer,buffer,header); } } @@ -114,16 +110,17 @@ public final class ForFactory extends IntStreamFactory { private final byte[] encoded; private final IntBuffer encodedBuffer; - ForIndexOutput(IndexOutput out, int blockSize) throws IOException { - super(out,blockSize); - this.encoded = new byte[blockSize*4+4]; + ForIndexOutput(IndexOutput out) throws IOException { + super(out,ForPostingsFormat.DEFAULT_BLOCK_SIZE); + this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4]; this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer(); } @Override protected void flushBlock() throws IOException { - final int numBytes = ForUtil.compress(buffer,buffer.length,encodedBuffer); - out.writeInt(numBytes); + final int header = ForUtil.compress(buffer,encodedBuffer); + final int numBytes = ForUtil.getEncodedSize(header); + out.writeInt(header); out.writeBytes(encoded, numBytes); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java index 77be04b37c4..b1fb043bfbe 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java @@ -17,48 +17,46 @@ package org.apache.lucene.codecs.pfor; * limitations under the License. */ -import java.util.Set; import java.io.IOException; +import java.util.Set; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.codecs.BlockTreeTermsReader; +import org.apache.lucene.codecs.BlockTreeTermsWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.BlockTreeTermsWriter; -import org.apache.lucene.codecs.BlockTreeTermsReader; -import org.apache.lucene.codecs.TermsIndexReaderBase; -import org.apache.lucene.codecs.TermsIndexWriterBase; import org.apache.lucene.codecs.FixedGapTermsIndexReader; import org.apache.lucene.codecs.FixedGapTermsIndexWriter; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.TermsIndexReaderBase; +import org.apache.lucene.codecs.TermsIndexWriterBase; import org.apache.lucene.codecs.sep.SepPostingsReader; import org.apache.lucene.codecs.sep.SepPostingsWriter; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; /** * Pass ForFactory to a PostingsWriter/ReaderBase, and get * customized postings format plugged. */ public final class ForPostingsFormat extends PostingsFormat { - private final int blockSize; private final int minBlockSize; private final int maxBlockSize; public final static int DEFAULT_BLOCK_SIZE = 128; public ForPostingsFormat() { super("For"); - this.blockSize = DEFAULT_BLOCK_SIZE; this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE; this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE; } public ForPostingsFormat(int minBlockSize, int maxBlockSize) { super("For"); - this.blockSize = DEFAULT_BLOCK_SIZE; this.minBlockSize = minBlockSize; assert minBlockSize > 1; this.maxBlockSize = maxBlockSize; @@ -67,7 +65,7 @@ public final class ForPostingsFormat extends PostingsFormat { @Override public String toString() { - return getName() + "(blocksize=" + blockSize + ")"; + return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE + ")"; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java index 684e9e0fb4a..0f26bc66f1a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java @@ -25,7 +25,6 @@ import java.util.Arrays; * which is determined by the max value in this block. */ public class ForUtil { - public static final int HEADER_INT_SIZE=1; protected static final int[] MASK = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, @@ -39,16 +38,30 @@ public class ForUtil { * @param data uncompressed data * @param size num of ints to compress * @param intBuffer integer buffer to hold compressed data + * @return encoded block byte size */ - public static int compress(final int[] data, int size, IntBuffer intBuffer) { - int numBits=getNumBits(data,size); - + public static int compress(final int[] data, IntBuffer intBuffer) { + int numBits=getNumBits(data); + if (numBits == 0) { + return compressDuplicateBlock(data,intBuffer); + } + + int size=data.length; + int encodedSize = (size*numBits+31)/32; + for (int i=0; i> 8) & MASK[6]); decompressCore(intBuffer, data, numBits); - - return numInts; } /** @@ -117,15 +125,10 @@ public class ForUtil { } } - static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits) { - int header = getHeader(numInts,numBits); - intBuffer.put(0, header); - } - static void encodeNormalValue(IntBuffer intBuffer, int pos, int value, int numBits) { final int globalBitPos = numBits*pos; // position in bit stream final int localBitPos = globalBitPos & 31; // position inside an int - int intPos = HEADER_INT_SIZE + globalBitPos/32; // which integer to locate + int intPos = globalBitPos/32; // which integer to locate setBufferIntBits(intBuffer, intPos, localBitPos, numBits, value); if ((localBitPos + numBits) > 32) { // value does not fit in this int, fill tail setBufferIntBits(intBuffer, intPos+1, 0, @@ -145,8 +148,12 @@ public class ForUtil { /** * Estimate best num of frame bits according to the largest value. */ - static int getNumBits(final int[] data, int size) { - int optBits=0; + static int getNumBits(final int[] data) { + if (isAllEqual(data)) { + return 0; + } + int size=data.length; + int optBits=1; for (int i=0; i> 8) & MASK[6]); + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java index af5508b52c1..dc110b86ec1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java @@ -39,11 +39,7 @@ import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput; public final class PForFactory extends IntStreamFactory { - /* number of ints for each block */ - private final int blockSize; - public PForFactory() { - this.blockSize=PForPostingsFormat.DEFAULT_BLOCK_SIZE; } @Override @@ -51,7 +47,7 @@ public final class PForFactory extends IntStreamFactory { boolean success = false; IndexOutput out = dir.createOutput(fileName, context); try { - IntIndexOutput ret = new PForIndexOutput(out, blockSize); + IntIndexOutput ret = new PForIndexOutput(out); success = true; return ret; } finally { @@ -85,11 +81,10 @@ public final class PForFactory extends IntStreamFactory { private final IntBuffer encodedBuffer; PForBlockReader(final IndexInput in, final int[] buffer) { - // upperbound for encoded value should include: + // upperbound for encoded value should include(here header is not buffered): // 1. blockSize of normal value (4x bytes); // 2. blockSize of exception value (4x bytes); - // 3. header (4bytes); - this.encoded = new byte[blockSize*8+4]; + this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8]; this.in = in; this.buffer = buffer; this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); @@ -98,10 +93,11 @@ public final class PForFactory extends IntStreamFactory { // TODO: implement public void skipBlock() {} ? @Override public void readBlock() throws IOException { - final int numBytes = in.readInt(); - assert numBytes <= blockSize*8+4; + final int header = in.readInt(); + final int numBytes = PForUtil.getEncodedSize(header); + assert numBytes <= PForPostingsFormat.DEFAULT_BLOCK_SIZE*8; in.readBytes(encoded,0,numBytes); - PForUtil.decompress(encodedBuffer,buffer); + PForUtil.decompress(encodedBuffer,buffer,header); } } @@ -115,16 +111,17 @@ public final class PForFactory extends IntStreamFactory { private final byte[] encoded; private final IntBuffer encodedBuffer; - PForIndexOutput(IndexOutput out, int blockSize) throws IOException { - super(out,blockSize); - this.encoded = new byte[blockSize*8+4]; + PForIndexOutput(IndexOutput out) throws IOException { + super(out, PForPostingsFormat.DEFAULT_BLOCK_SIZE); + this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8]; this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer(); } @Override protected void flushBlock() throws IOException { - final int numBytes = PForUtil.compress(buffer,buffer.length,encodedBuffer); - out.writeInt(numBytes); + final int header = PForUtil.compress(buffer,encodedBuffer); + final int numBytes = PForUtil.getEncodedSize(header); + out.writeInt(header); out.writeBytes(encoded, numBytes); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java index b888ad6ebc4..8704dcdd4cf 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java @@ -45,20 +45,17 @@ import org.apache.lucene.util.IOUtils; * customized postings format plugged. */ public final class PForPostingsFormat extends PostingsFormat { - private final int blockSize; private final int minBlockSize; private final int maxBlockSize; public final static int DEFAULT_BLOCK_SIZE = 128; public PForPostingsFormat() { super("PFor"); - this.blockSize = DEFAULT_BLOCK_SIZE; this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE; this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE; } public PForPostingsFormat(int minBlockSize, int maxBlockSize) { super("PFor"); - this.blockSize = DEFAULT_BLOCK_SIZE; this.minBlockSize = minBlockSize; assert minBlockSize > 1; this.maxBlockSize = maxBlockSize; @@ -67,7 +64,7 @@ public final class PForPostingsFormat extends PostingsFormat { @Override public String toString() { - return getName() + "(blocksize=" + blockSize + ")"; + return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE+ ")"; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java index 31ff062ffa3..92baa3e2317 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java @@ -32,13 +32,17 @@ public final class PForUtil extends ForUtil { /** Compress given int[] into Integer buffer, with PFor format * * @param data uncompressed data - * @param size num of ints to compress * @param intBuffer integer buffer to hold compressed data + * @return block header */ - public static int compress(final int[] data, int size, IntBuffer intBuffer) { + public static int compress(final int[] data, IntBuffer intBuffer) { /** estimate minimum compress size to determine numFrameBits */ - int numBits=getNumBits(data,size); - + int numBits=getNumBits(data); + if (numBits == 0) { + return compressDuplicateBlock(data,intBuffer); + } + + int size = data.length; int[] excValues = new int[size]; int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1; @@ -115,13 +119,13 @@ public final class PForUtil extends ForUtil { excBytes=4; } } - excByteOffset = HEADER_INT_SIZE*4 + (size*numBits + 7)/8; + excByteOffset = (size*numBits + 7)/8; encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset); /** encode header */ - encodeHeader(intBuffer, size, numBits, excNum, excFirstPos, excBytes); + int encodedSize = (excByteOffset + excBytes*excNum + 3)/4; - return (excByteOffset + excBytes*excNum + 3)/4*4; + return getHeader(encodedSize, numBits, excNum, excFirstPos, excBytes); } /** Decompress given Integer buffer into int array. @@ -129,13 +133,10 @@ public final class PForUtil extends ForUtil { * @param intBuffer integer buffer to hold compressed data * @param data int array to hold uncompressed data */ - public static int decompress(IntBuffer intBuffer, int[] data) { - + public static void decompress(IntBuffer intBuffer, int[] data, int header) { // since this buffer is reused at upper level, rewind first intBuffer.rewind(); - int header = intBuffer.get(); - int numInts = (header & MASK[8]); int excNum = ((header >> 8) & MASK[8]) + 1; int excFirstPos = ((header >> 16) & MASK[8]) - 1; int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]]; @@ -144,13 +145,6 @@ public final class PForUtil extends ForUtil { decompressCore(intBuffer, data, numBits); patchException(intBuffer,data,excNum,excFirstPos,excBytes); - - return numInts; - } - - static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits, int excNum, int excFirstPos, int excBytes) { - int header = getHeader(numInts,numBits,excNum,excFirstPos,excBytes); - intBuffer.put(0, header); } /** @@ -189,6 +183,14 @@ public final class PForUtil extends ForUtil { } } + /** + * Save only header when the whole block equals to 1 + */ + static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) { + intBuffer.put(0,data[0]); + return getHeader(1, 0, 0, -1, 0); + } + /** * Decode exception values base on the exception pointers in normal area, * and values in exception area. @@ -200,7 +202,6 @@ public final class PForUtil extends ForUtil { * In this case we should preprocess patch several heading exceptions, * before calling this method. * - * TODO: blockSize is hardewired to size==128 only */ public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) { if (excFirstPos == -1) { @@ -251,13 +252,14 @@ public final class PForUtil extends ForUtil { * Estimate best number of frame bits according to minimum compressed size. * It will run 32 times. */ - static int getNumBits(final int[] data, int size) { - if (isAllZero(data)) + static int getNumBits(final int[] data) { + if (isAllEqual(data)) { return 0; + } int optBits=1; - int optSize=estimateCompressedSize(data,size,optBits); + int optSize=estimateCompressedSize(data,optBits); for (int i=2; i<=32; ++i) { - int curSize=estimateCompressedSize(data,size,i); + int curSize=estimateCompressedSize(data,i); if (curSize> 8) & MASK[8]) + 1; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java index d5d0b4d6f05..6f41d65da42 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java @@ -28,7 +28,7 @@ final class PackedIntsDecompress { // NOTE: hardwired to blockSize == 128 public static void decode0(final IntBuffer compressedBuffer, final int[] output) { - Arrays.fill(output, 0); + Arrays.fill(output, compressedBuffer.get()); } public static void decode1(final IntBuffer compressedBuffer, final int[] output) { final int numFrameBits = 1; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py b/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py index 06daeca90d7..3ea9f6042ed 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py @@ -78,7 +78,7 @@ def genDecompress(): w('\n // NOTE: hardwired to blockSize == 128\n\n') w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n') - w(' Arrays.fill(output, 0);\n') + w(' Arrays.fill(output, compressedBuffer.get());\n') w(' }\n') for numFrameBits in xrange(1, 33): diff --git a/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java index 4157fc3f277..830952e2850 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java @@ -46,39 +46,33 @@ public class TestPForUtil extends LuceneTestCase { } /** - * Should not encode extra information other than header + * Should not encode extra information other than single int */ - public void testPForAllZeros() throws Exception { + public void testAllEqual() throws Exception { + initRandom(); int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE; - int ensz; int[] data=new int[sz]; - byte[] res = new byte[4+sz*8]; + byte[] res = new byte[sz*8]; int[] copy = new int[sz]; IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); - - Arrays.fill(data,0); - ensz = ForUtil.compress(data,sz,resBuffer); // test For - ForUtil.decompress(resBuffer,copy); - assert ensz == 4; - assert cmp(data,sz,copy,sz)==true; - - Arrays.fill(data,0); - ensz = PForUtil.compress(data,sz,resBuffer); // test PFor - PForUtil.decompress(resBuffer,copy); - assert ensz == 4; - assert cmp(data,sz,copy,sz)==true; - } - - public void testForAllZeros() throws Exception { - int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE; int ensz; - int[] data=new int[sz]; - byte[] res = new byte[4+sz*8]; - int[] copy = new int[sz]; - IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); + int header; - ensz = ForUtil.compress(data,sz,resBuffer); + Arrays.fill(data,gen.nextInt()); + header = ForUtil.compress(data,resBuffer); // test For + ensz = ForUtil.getEncodedSize(header); + assert ensz == 4; + ForUtil.decompress(resBuffer,copy,header); + assert cmp(data,sz,copy,sz)==true; + + Arrays.fill(data,gen.nextInt()); + header = PForUtil.compress(data,resBuffer); // test PFor + ensz = PForUtil.getEncodedSize(header); + assert ensz == 4; + + PForUtil.decompress(resBuffer,copy,header); + assert cmp(data,sz,copy,sz)==true; } /** @@ -89,7 +83,7 @@ public class TestPForUtil extends LuceneTestCase { initRandom(); int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE; int[] data=new int[sz]; - byte[] res = new byte[4+sz*8]; + byte[] res = new byte[sz*8]; int[] copy = new int[sz]; IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); int numBits = gen.nextInt(5)+1; @@ -107,8 +101,8 @@ public class TestPForUtil extends LuceneTestCase { data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc; j++; } - ensz = PForUtil.compress(data,sz,resBuffer); - header = resBuffer.get(0); + header = PForUtil.compress(data,resBuffer); + ensz = PForUtil.getEncodedSize(header); expect = j; got = PForUtil.getExcNum(header); assert expect == got: expect+" expected but got "+got; @@ -122,8 +116,8 @@ public class TestPForUtil extends LuceneTestCase { data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc; j++; } - ensz = PForUtil.compress(data,sz,resBuffer); - header = resBuffer.get(0); + header = PForUtil.compress(data,resBuffer); + ensz = PForUtil.getEncodedSize(header); expect = 2*(j-1)+1; got = PForUtil.getExcNum(header); assert expect == got: expect+" expected but got "+got; @@ -137,8 +131,8 @@ public class TestPForUtil extends LuceneTestCase { data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc; j++; } - ensz = PForUtil.compress(data,sz,resBuffer); - header = resBuffer.get(0); + header = PForUtil.compress(data,resBuffer); + ensz = PForUtil.getEncodedSize(header); expect = 3*(j-1)+1; got = PForUtil.getExcNum(header); assert expect == got: expect+" expected but got "+got; @@ -156,7 +150,7 @@ public class TestPForUtil extends LuceneTestCase { Integer[] buff= new Integer[sz]; int[] data = new int[sz]; int[] copy = new int[sz]; - byte[] res = new byte[4+sz*8]; + byte[] res = new byte[sz*8]; IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); int excIndex = gen.nextInt(sz/2); @@ -176,13 +170,13 @@ public class TestPForUtil extends LuceneTestCase { for (int i=0; i "+sz*8+4; // must not exceed the loose upperbound - assert (ensz >= 8); // at least we have a header along with an exception, right? + assert (ensz <= sz*8): ensz+" > "+sz*8; // must not exceed the loose upperbound + assert (ensz >= 4); // at least we have an exception, right? - resBuffer.rewind(); - PForUtil.decompress(resBuffer,copy); + PForUtil.decompress(resBuffer,copy,header); // println(getHex(data,sz)+"\n"); // println(getHex(res,ensz)+"\n"); @@ -211,7 +205,7 @@ public class TestPForUtil extends LuceneTestCase { int[] data = new int[sz]; for (int i=0; i<=32; ++i) { // try to test every kinds of distribution double alpha=gen.nextDouble(); // rate of normal value - for (int j=0; j<=32; ++j) { + for (int j=i; j<=32; ++j) { createDistribution(data,sz,alpha,MASK[i],MASK[j]); tryCompressAndDecompress(data, sz); } @@ -229,15 +223,16 @@ public class TestPForUtil extends LuceneTestCase { data[i] = buff[i]; } public void tryCompressAndDecompress(final int[] data, int sz) throws Exception { - byte[] res = new byte[4+sz*8]; // loosely upperbound + byte[] res = new byte[sz*8]; // loosely upperbound IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); - int ensz = PForUtil.compress(data,sz,resBuffer); + int header = PForUtil.compress(data,resBuffer); + int ensz = PForUtil.getEncodedSize(header); - assert (ensz <= sz*8+4); // must not exceed the loose upperbound + assert (ensz <= sz*8); // must not exceed the loose upperbound int[] copy = new int[sz]; - PForUtil.decompress(resBuffer,copy); + PForUtil.decompress(resBuffer,copy,header); // println(getHex(data,sz)+"\n"); // println(getHex(res,ensz)+"\n");