diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java index ed7c63b48e7..77be04b37c4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java @@ -17,28 +17,27 @@ package org.apache.lucene.codecs.pfor; * limitations under the License. */ -import java.io.IOException; import java.util.Set; +import java.io.IOException; -import org.apache.lucene.codecs.BlockTreeTermsReader; -import org.apache.lucene.codecs.BlockTreeTermsWriter; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.BlockTreeTermsWriter; +import org.apache.lucene.codecs.BlockTreeTermsReader; +import org.apache.lucene.codecs.TermsIndexReaderBase; +import org.apache.lucene.codecs.TermsIndexWriterBase; import org.apache.lucene.codecs.FixedGapTermsIndexReader; import org.apache.lucene.codecs.FixedGapTermsIndexWriter; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.TermsIndexReaderBase; -import org.apache.lucene.codecs.TermsIndexWriterBase; +import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.sep.SepPostingsReader; import org.apache.lucene.codecs.sep.SepPostingsWriter; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; /** * Pass ForFactory to a PostingsWriter/ReaderBase, and get diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java index 473f9d3030a..684e9e0fb4a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java @@ -63,7 +63,7 @@ public class ForUtil { int header = intBuffer.get(); int numInts = (header & MASK[8]) + 1; - int numBits = ((header >> 8) & MASK[5]) + 1; + int numBits = ((header >> 8) & MASK[6]); decompressCore(intBuffer, data, numBits); @@ -77,10 +77,11 @@ public class ForUtil { */ static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) { assert numBits<=32; - assert numBits>=1; + assert numBits>=0; - // TODO: PackedIntsDecompress is hardewired to size==129 only + // TODO: PackedIntsDecompress is hardewired to size==128 only switch(numBits) { + case 0: PackedIntsDecompress.decode0(intBuffer, data); break; case 1: PackedIntsDecompress.decode1(intBuffer, data); break; case 2: PackedIntsDecompress.decode2(intBuffer, data); break; case 3: PackedIntsDecompress.decode3(intBuffer, data); break; @@ -145,7 +146,7 @@ public class ForUtil { * Estimate best num of frame bits according to the largest value. */ static int getNumBits(final int[] data, int size) { - int optBits=1; + int optBits=0; for (int i=0; i= maxChain + excLastPos); // force exception conEnd = (excNum == excNumBase); // following forced ignored if ((!conValue && !conForce) || conEnd) { encodeNormalValue(intBuffer,i,data[i], numBits); } else { - if (excLastPos >= 0) { - encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits); - } + encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits); excNum++; excLastPos = i; } } - if (excLastPos >= 0) { - encodeNormalValue(intBuffer, excLastPos, (i-excLastPos-1)&MASK[numBits], numBits); // mask out suppressed force exception - } } /** encode exception area */ - i=0; - for (; i> 8) & MASK[8]) + 1; int excFirstPos = ((header >> 16) & MASK[8]) - 1; - int excBytes = PER_EXCEPTION_SIZE[(header >> 29) & MASK[2]]; - int numBits = ((header >> 24) & MASK[5]) + 1; + int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]]; + int numBits = ((header >> 24) & MASK[6]); decompressCore(intBuffer, data, numBits); @@ -157,6 +153,11 @@ public final class PForUtil extends ForUtil { intBuffer.put(0, header); } + /** + * Encode exception values into exception area. + * The width for each exception will be fixed as: + * 1, 2, or 4 byte(s). + */ static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) { if (num == 0) return; @@ -251,8 +252,10 @@ public final class PForUtil extends ForUtil { * It will run 32 times. */ static int getNumBits(final int[] data, int size) { + if (isAllZero(data)) + return 0; int optBits=1; - int optSize=estimateCompressedSize(data,size,1); + int optSize=estimateCompressedSize(data,size,optBits); for (int i=2; i<=32; ++i) { int curSize=estimateCompressedSize(data,size,i); if (curSize> 8) & MASK[8]) + 1; + } + public static int getFirstPos(int header) { + return ((header >> 16) & MASK[8]) - 1; + } + public static int getExcBytes(int header) { + return PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]]; + } + public static int getNumBits(int header) { + return ((header >> 24) & MASK[6]); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java index f2af67f4510..d5d0b4d6f05 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java @@ -19,6 +19,7 @@ package org.apache.lucene.codecs.pfor; /* This code is generated, do not modify. See gendecompress.py */ import java.nio.IntBuffer; +import java.util.Arrays; final class PackedIntsDecompress { @@ -26,6 +27,9 @@ final class PackedIntsDecompress { // NOTE: hardwired to blockSize == 128 + public static void decode0(final IntBuffer compressedBuffer, final int[] output) { + Arrays.fill(output, 0); + } public static void decode1(final IntBuffer compressedBuffer, final int[] output) { final int numFrameBits = 1; final int mask = (int) ((1L<0 + createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]); + pace = (1< "+sz*8+4; // must not exceed the loose upperbound + assert (ensz >= 8); // at least we have a header along with an exception, right? resBuffer.rewind(); PForUtil.decompress(resBuffer,copy); - //println(getHex(data,sz)+"\n"); - //println(getHex(res,ensz)+"\n"); - //println(getHex(copy,sz)+"\n"); - +// println(getHex(data,sz)+"\n"); +// println(getHex(res,ensz)+"\n"); +// println(getHex(copy,sz)+"\n"); + + // fetch the last int, i.e. last exception. + int lastExc = (res[ensz-4] << 24) | + ((0xff & res[ensz-3]) << 16) | + ((0xff & res[ensz-2]) << 8 ) | + (0xff & res[ensz-1]); + + // trailing forced exceptions are suppressed, + // so the last exception should be what we assigned. + assert lastExc==excValue; assert cmp(data,sz,copy,sz)==true; } @@ -87,18 +207,18 @@ public class TestPForUtil extends LuceneTestCase { */ public void testAllDistribution() throws Exception { initRandom(); + int sz = ForPostingsFormat.DEFAULT_BLOCK_SIZE; + int[] data = new int[sz]; for (int i=0; i<=32; ++i) { // try to test every kinds of distribution double alpha=gen.nextDouble(); // rate of normal value for (int j=0; j<=32; ++j) { - tryDistribution(ForPostingsFormat.DEFAULT_BLOCK_SIZE,alpha,MASK[i],MASK[j]); + createDistribution(data,sz,alpha,MASK[i],MASK[j]); + tryCompressAndDecompress(data, sz); } } } - public void tryDistribution(int sz, double alpha, int masknorm, int maskexc) throws Exception { + public void createDistribution(int[] data, int sz, double alpha, int masknorm, int maskexc) { Integer[] buff= new Integer[sz]; - int[] data = new int[sz]; - byte[] res = new byte[4+sz*8]; // loosely upperbound - IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); int i=0; for (; i