LUCENE-3892: add PForPostingsFormat

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1356531 13f79535-47bb-0310-9956-ffa450edef68
2025-03-04 07:19:18 +00:00 · 2012-07-03 00:43:06 +00:00 · 2012-07-03 00:43:06 +00:00 · 5b4e1aea06
commit 5b4e1aea06
parent c272f90605
14 changed files with 594 additions and 55 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
@ -36,7 +36,7 @@ import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
 * Things really make sense are: flushBlock() and readBlock()
 */

-public class ForFactory extends IntStreamFactory {
+public final class ForFactory extends IntStreamFactory {
  private final int blockSize;

  public ForFactory() {
--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java
@ -42,7 +42,7 @@ import org.apache.lucene.codecs.sep.SepPostingsWriter;
 * to a PostingsWriter/ReaderBase, and get customized
 * format plugged.
 */
-public class ForPostingsFormat extends PostingsFormat {
+public final class ForPostingsFormat extends PostingsFormat {
  private final int blockSize;
  private final int minBlockSize;
  private final int maxBlockSize;
--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
@ -21,16 +21,15 @@ import java.nio.ByteBuffer;
 import java.util.Arrays;

 // Encode all values in normal area, based on the bit size for max value
-public final class ForUtil {
+public class ForUtil {
  public static final int HEADER_INT_SIZE=1;
-  private static final int[] MASK = {   0x00000000,
+  protected static final int[] MASK = {   0x00000000,
    0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
    0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
    0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff,
    0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
    0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
    0x7fffffff, 0xffffffff};
-  private static final int[] PER_EXCEPTION_SIZE = {1,2,4};

  public static int compress(final int[] data, int size, IntBuffer intBuffer) {
    int numBits=getNumBits(data,size);
@ -51,40 +50,40 @@ public final class ForUtil {
    int numInts = (header & MASK[8]) + 1;
    int numBits = ((header >> 8) & MASK[5]) + 1;

-    // TODO: ForDecompressImpl is hardewired to size==128 only
+    // TODO: PackedIntsDecompress is hardewired to size==128 only
    switch(numBits) {
-      case 1: ForDecompressImpl.decode1(intBuffer, data); break;
-      case 2: ForDecompressImpl.decode2(intBuffer, data); break;
-      case 3: ForDecompressImpl.decode3(intBuffer, data); break;
-      case 4: ForDecompressImpl.decode4(intBuffer, data); break;
-      case 5: ForDecompressImpl.decode5(intBuffer, data); break;
-      case 6: ForDecompressImpl.decode6(intBuffer, data); break;
-      case 7: ForDecompressImpl.decode7(intBuffer, data); break;
-      case 8: ForDecompressImpl.decode8(intBuffer, data); break;
-      case 9: ForDecompressImpl.decode9(intBuffer, data); break;
-      case 10: ForDecompressImpl.decode10(intBuffer, data); break;
-      case 11: ForDecompressImpl.decode11(intBuffer, data); break;
-      case 12: ForDecompressImpl.decode12(intBuffer, data); break;
-      case 13: ForDecompressImpl.decode13(intBuffer, data); break;
-      case 14: ForDecompressImpl.decode14(intBuffer, data); break;
-      case 15: ForDecompressImpl.decode15(intBuffer, data); break;
-      case 16: ForDecompressImpl.decode16(intBuffer, data); break;
-      case 17: ForDecompressImpl.decode17(intBuffer, data); break;
-      case 18: ForDecompressImpl.decode18(intBuffer, data); break;
-      case 19: ForDecompressImpl.decode19(intBuffer, data); break;
-      case 20: ForDecompressImpl.decode20(intBuffer, data); break;
-      case 21: ForDecompressImpl.decode21(intBuffer, data); break;
-      case 22: ForDecompressImpl.decode22(intBuffer, data); break;
-      case 23: ForDecompressImpl.decode23(intBuffer, data); break;
-      case 24: ForDecompressImpl.decode24(intBuffer, data); break;
-      case 25: ForDecompressImpl.decode25(intBuffer, data); break;
-      case 26: ForDecompressImpl.decode26(intBuffer, data); break;
-      case 27: ForDecompressImpl.decode27(intBuffer, data); break;
-      case 28: ForDecompressImpl.decode28(intBuffer, data); break;
-      case 29: ForDecompressImpl.decode29(intBuffer, data); break;
-      case 30: ForDecompressImpl.decode30(intBuffer, data); break;
-      case 31: ForDecompressImpl.decode31(intBuffer, data); break;
-      case 32: ForDecompressImpl.decode32(intBuffer, data); break;
+      case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
+      case 2: PackedIntsDecompress.decode2(intBuffer, data); break;
+      case 3: PackedIntsDecompress.decode3(intBuffer, data); break;
+      case 4: PackedIntsDecompress.decode4(intBuffer, data); break;
+      case 5: PackedIntsDecompress.decode5(intBuffer, data); break;
+      case 6: PackedIntsDecompress.decode6(intBuffer, data); break;
+      case 7: PackedIntsDecompress.decode7(intBuffer, data); break;
+      case 8: PackedIntsDecompress.decode8(intBuffer, data); break;
+      case 9: PackedIntsDecompress.decode9(intBuffer, data); break;
+      case 10: PackedIntsDecompress.decode10(intBuffer, data); break;
+      case 11: PackedIntsDecompress.decode11(intBuffer, data); break;
+      case 12: PackedIntsDecompress.decode12(intBuffer, data); break;
+      case 13: PackedIntsDecompress.decode13(intBuffer, data); break;
+      case 14: PackedIntsDecompress.decode14(intBuffer, data); break;
+      case 15: PackedIntsDecompress.decode15(intBuffer, data); break;
+      case 16: PackedIntsDecompress.decode16(intBuffer, data); break;
+      case 17: PackedIntsDecompress.decode17(intBuffer, data); break;
+      case 18: PackedIntsDecompress.decode18(intBuffer, data); break;
+      case 19: PackedIntsDecompress.decode19(intBuffer, data); break;
+      case 20: PackedIntsDecompress.decode20(intBuffer, data); break;
+      case 21: PackedIntsDecompress.decode21(intBuffer, data); break;
+      case 22: PackedIntsDecompress.decode22(intBuffer, data); break;
+      case 23: PackedIntsDecompress.decode23(intBuffer, data); break;
+      case 24: PackedIntsDecompress.decode24(intBuffer, data); break;
+      case 25: PackedIntsDecompress.decode25(intBuffer, data); break;
+      case 26: PackedIntsDecompress.decode26(intBuffer, data); break;
+      case 27: PackedIntsDecompress.decode27(intBuffer, data); break;
+      case 28: PackedIntsDecompress.decode28(intBuffer, data); break;
+      case 29: PackedIntsDecompress.decode29(intBuffer, data); break;
+      case 30: PackedIntsDecompress.decode30(intBuffer, data); break;
+      case 31: PackedIntsDecompress.decode31(intBuffer, data); break;
+      case 32: PackedIntsDecompress.decode32(intBuffer, data); break;
      default:
        throw new IllegalStateException("Unknown numFrameBits " + numBits);
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java
@ -0,0 +1,114 @@
+package org.apache.lucene.codecs.pfor;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
+
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.codecs.sep.IntStreamFactory;
+import org.apache.lucene.codecs.sep.IntIndexInput;
+import org.apache.lucene.codecs.sep.IntIndexOutput;
+import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput;
+import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
+
+/** 
+ * Stuff to pass to PostingsReader/WriterBase.
+ * Things really make sense are: flushBlock() and readBlock()
+ */
+
+public final class PForFactory extends IntStreamFactory {
+  private final int blockSize;
+
+  public PForFactory() {
+    this.blockSize=PForPostingsFormat.DEFAULT_BLOCK_SIZE;
+  }
+
+  @Override
+  public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context)  throws IOException {
+    IndexOutput out = dir.createOutput(fileName, context);
+    boolean success = false;
+    try {
+      FixedIntBlockIndexOutput ret = new  PForIndexOutput(out, blockSize);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        // TODO: why handle exception like this? 
+        // and why not use similar codes for read part?
+        IOUtils.closeWhileHandlingException(out);
+      }
+    }
+  }
+  @Override
+  public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
+    FixedIntBlockIndexInput ret = new PForIndexInput(dir.openInput(fileName, context));
+    return ret;
+  }
+
+  // wrap input and output with buffer support
+  private class PForIndexInput extends FixedIntBlockIndexInput {
+    PForIndexInput(final IndexInput in) throws IOException {
+      super(in);
+    }
+    class PForBlockReader implements FixedIntBlockIndexInput.BlockReader {
+      byte[] encoded;
+      int[] buffer;
+      IndexInput in;
+      IntBuffer encodedBuffer;
+      PForBlockReader(final IndexInput in, final int[] buffer) {
+        this.encoded = new byte[blockSize*8+4];
+        this.in=in;
+        this.buffer=buffer;
+        this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
+      }
+      public void seek(long pos) {}
+      // TODO: implement public void skipBlock() {} ?
+      public void readBlock() throws IOException {
+        final int numBytes = in.readInt();
+        assert numBytes <= blockSize*8+4;
+        in.readBytes(encoded,0,numBytes);
+        PForUtil.decompress(encodedBuffer,buffer);
+      }
+    }
+    @Override
+    protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
+      return new PForBlockReader(in,buffer);
+    }
+  }
+
+  private class PForIndexOutput extends FixedIntBlockIndexOutput {
+      private byte[] encoded;
+      private IntBuffer encodedBuffer;
+    PForIndexOutput(IndexOutput out, int blockSize) throws IOException {
+      super(out,blockSize);
+      this.encoded = new byte[blockSize*8+4];
+      this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
+    }
+    @Override
+    protected void flushBlock() throws IOException {
+      final int numBytes = PForUtil.compress(buffer,buffer.length,encodedBuffer);
+      out.writeInt(numBytes);
+      out.writeBytes(encoded, numBytes);
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java
@ -0,0 +1,117 @@
+package org.apache.lucene.codecs.pfor;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Set;
+import java.io.IOException;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.BlockTreeTermsWriter;
+import org.apache.lucene.codecs.BlockTreeTermsReader;
+import org.apache.lucene.codecs.TermsIndexReaderBase;
+import org.apache.lucene.codecs.TermsIndexWriterBase;
+import org.apache.lucene.codecs.FixedGapTermsIndexReader;
+import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.sep.SepPostingsReader;
+import org.apache.lucene.codecs.sep.SepPostingsWriter;
+/**
+ * This class actually only pass the PForFactory
+ * to a PostingsWriter/ReaderBase, and get customized
+ * format plugged.
+ */
+public final class PForPostingsFormat extends PostingsFormat {
+  private final int blockSize;
+  private final int minBlockSize;
+  private final int maxBlockSize;
+  protected final static int DEFAULT_BLOCK_SIZE = 128;
+  protected final static int DEFAULT_TERM_CACHED_SIZE = 1024;
+
+  public PForPostingsFormat() {
+    super("PFor");
+    this.blockSize = DEFAULT_BLOCK_SIZE;
+    this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
+    this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
+  }
+  public PForPostingsFormat(int minBlockSize, int maxBlockSize) {
+    super("PFor");
+    this.blockSize = DEFAULT_BLOCK_SIZE;
+    this.minBlockSize = minBlockSize;
+    assert minBlockSize > 1;
+    this.maxBlockSize = maxBlockSize;
+    assert minBlockSize <= maxBlockSize;
+  }
+
+  @Override
+  public String toString() {
+    return getName() + "(blocksize=" + blockSize + ")";
+  }
+
+  @Override
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    // TODO: implement a new PostingsWriterBase to improve skip-settings
+    PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new PForFactory()); 
+    boolean success = false;
+    try {
+      FieldsConsumer ret = new BlockTreeTermsWriter(state, 
+                                                    postingsWriter,
+                                                    minBlockSize, 
+                                                    maxBlockSize);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        postingsWriter.close();
+      }
+    }
+  }
+
+  @Override
+  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+    PostingsReaderBase postingsReader = new SepPostingsReader(state.dir,
+                                                              state.fieldInfos,
+                                                              state.segmentInfo,
+                                                              state.context,
+                                                              new PForFactory(),
+                                                              state.segmentSuffix);
+
+    boolean success = false;
+    try {
+      FieldsProducer ret = new BlockTreeTermsReader(state.dir,
+                                                    state.fieldInfos,
+                                                    state.segmentInfo.name,
+                                                    postingsReader,
+                                                    state.context,
+                                                    state.segmentSuffix,
+                                                    state.termsIndexDivisor);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        postingsReader.close();
+      }
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java
@ -0,0 +1,308 @@
+package org.apache.lucene.codecs.pfor;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.nio.IntBuffer;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+// Encode all small values and exception pointers in normal area, 
+// Encode large values in exception area.
+// Size per exception is variable, possibly: 1byte, 2bytes, or 4bytes
+public final class PForUtil extends ForUtil {
+  protected static final int[] PER_EXCEPTION_SIZE = {1,2,4};
+
+  public static int compress(final int[] data, int size, IntBuffer intBuffer) {
+    int numBits=getNumBits(data,size);
+  
+    int[] excValues = new int[size];
+    int excNum = 0, excLastPos = -1, excFirstPos = -1;
+    int excLastNonForcePos = -1; 
+    int excNumBase = 0;       // num of exception until the last non-force exception
+    int excBytes = 1;         // bytes per exception
+    int excByteOffset = 0;    // bytes of preceeding codes like header and normal area
+    long maxChain = (1<<8) - 2;  // header bits limits this to 254
+    boolean conValue, conForce, conEnd;
+    int i=0;
+
+    // estimate exceptions
+    for (i=0; i<size; ++i) {
+      conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
+      conForce = (i >= maxChain + excLastPos);           // force exception
+      if (conValue || conForce) {
+        excValues[excNum++] = data[i];
+        if (excLastPos == -1) {
+          maxChain = 1L<<numBits; 
+          excFirstPos = i;
+        }
+        if (conValue) {
+          excLastNonForcePos = i;
+          excNumBase = excNum;
+        }
+        excLastPos = i;
+      }
+    }
+
+    // encode normal area, record exception positions
+    i=0;
+    excNum = 0;
+    if (excFirstPos < 0) { // no exception 
+      for (; i<size; ++i) {
+        encodeNormalValue(intBuffer,i,data[i], numBits);
+      }
+      excLastPos = -1;
+    } else {
+      for (; i<excFirstPos; ++i) {
+        encodeNormalValue(intBuffer,i,data[i], numBits);
+      }
+      maxChain = 1L<<numBits;
+      excLastPos = -1;
+      for (; i<size; ++i) {
+        conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
+        conForce = (i >= maxChain + excLastPos);           // force exception
+        conEnd = (excNum == excNumBase);                   // following forced ignored
+        if ((!conValue && !conForce) || conEnd) {
+          encodeNormalValue(intBuffer,i,data[i], numBits);
+        } else {
+          if (excLastPos >= 0) {
+            encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits); 
+          }
+          excNum++;
+          excLastPos = i;
+        }
+      }
+      if (excLastPos >= 0) { 
+        encodeNormalValue(intBuffer, excLastPos, (i-excLastPos-1)&MASK[numBits], numBits); // mask out suppressed force exception
+      }
+    }
+  
+    // encode exception area
+    i=0;
+    for (; i<excNum; ++i) {
+      if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) {
+        excBytes=2;
+      }
+      if (excBytes < 4 && (excValues[i] & ~MASK[16]) != 0) {
+        excBytes=4;
+      }
+    }
+    excByteOffset = HEADER_INT_SIZE*4 + (size*numBits + 7)/8;
+    encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
+
+    // encode header
+    encodeHeader(intBuffer, size, numBits, excNum, excFirstPos, excBytes);
+
+    return (excByteOffset + excBytes*excNum + 3)/4*4;
+  }
+  
+  public static int decompress(IntBuffer intBuffer, int[] data) {
+    intBuffer.rewind();
+    int header = intBuffer.get();
+
+    int numInts = (header & MASK[8]) + 1;
+    int excNum = ((header >> 8) & MASK[8]) + 1;
+    int excFirstPos = ((header >> 16) & MASK[8]) - 1;
+    int excBytes = PER_EXCEPTION_SIZE[(header >> 29) & MASK[2]];
+    int numBits = ((header >> 24) & MASK[5]) + 1;
+
+    // TODO: PackedIntsDecompress is hardewired to size==128 only
+    switch(numBits) {
+      case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
+      case 2: PackedIntsDecompress.decode2(intBuffer, data); break;
+      case 3: PackedIntsDecompress.decode3(intBuffer, data); break;
+      case 4: PackedIntsDecompress.decode4(intBuffer, data); break;
+      case 5: PackedIntsDecompress.decode5(intBuffer, data); break;
+      case 6: PackedIntsDecompress.decode6(intBuffer, data); break;
+      case 7: PackedIntsDecompress.decode7(intBuffer, data); break;
+      case 8: PackedIntsDecompress.decode8(intBuffer, data); break;
+      case 9: PackedIntsDecompress.decode9(intBuffer, data); break;
+      case 10: PackedIntsDecompress.decode10(intBuffer, data); break;
+      case 11: PackedIntsDecompress.decode11(intBuffer, data); break;
+      case 12: PackedIntsDecompress.decode12(intBuffer, data); break;
+      case 13: PackedIntsDecompress.decode13(intBuffer, data); break;
+      case 14: PackedIntsDecompress.decode14(intBuffer, data); break;
+      case 15: PackedIntsDecompress.decode15(intBuffer, data); break;
+      case 16: PackedIntsDecompress.decode16(intBuffer, data); break;
+      case 17: PackedIntsDecompress.decode17(intBuffer, data); break;
+      case 18: PackedIntsDecompress.decode18(intBuffer, data); break;
+      case 19: PackedIntsDecompress.decode19(intBuffer, data); break;
+      case 20: PackedIntsDecompress.decode20(intBuffer, data); break;
+      case 21: PackedIntsDecompress.decode21(intBuffer, data); break;
+      case 22: PackedIntsDecompress.decode22(intBuffer, data); break;
+      case 23: PackedIntsDecompress.decode23(intBuffer, data); break;
+      case 24: PackedIntsDecompress.decode24(intBuffer, data); break;
+      case 25: PackedIntsDecompress.decode25(intBuffer, data); break;
+      case 26: PackedIntsDecompress.decode26(intBuffer, data); break;
+      case 27: PackedIntsDecompress.decode27(intBuffer, data); break;
+      case 28: PackedIntsDecompress.decode28(intBuffer, data); break;
+      case 29: PackedIntsDecompress.decode29(intBuffer, data); break;
+      case 30: PackedIntsDecompress.decode30(intBuffer, data); break;
+      case 31: PackedIntsDecompress.decode31(intBuffer, data); break;
+      case 32: PackedIntsDecompress.decode32(intBuffer, data); break;
+      default:
+        throw new IllegalStateException("Unknown numFrameBits " + numBits);
+    }
+    patchException(intBuffer,data,excNum,excFirstPos,excBytes);
+    return numInts;
+  }
+
+  static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
+    int header = getHeader(numInts,numBits,excNum,excFirstPos,excBytes);
+    intBuffer.put(0, header);
+  }
+
+  static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) {
+    if (num == 0)
+      return;
+    if (perbytes == 1) {
+      int curBytePos = byteOffset;
+      for (int i=0; i<num; ++i) {
+        int curIntPos = curBytePos / 4;
+        setBufferIntBits(intBuffer, curIntPos, (curBytePos & 3)*8, 8, values[i]);
+        curBytePos++;
+      }
+    } else if (perbytes == 2) {
+      int shortOffset = (byteOffset+1)/2;
+      int curIntPos = shortOffset/2;
+      int i=0;
+      if ((shortOffset & 1) == 1) {  // cut head to ensure remaining fit ints
+        setBufferIntBits(intBuffer, curIntPos++, 16, 16, values[i++]); 
+      }
+      for (; i<num-1; i+=2) {
+        intBuffer.put(curIntPos++, (values[i+1]<<16) | values[i]);
+      }
+      if (i<num) {
+        intBuffer.put(curIntPos, values[i]); // cut tail, also clear high 16 bits
+      }
+    } else if (perbytes == 4) {
+      int curIntPos = (byteOffset+3) / 4;
+      for (int i=0; i<num; ++i) {
+        intBuffer.put(curIntPos++, values[i]);
+      }
+    }
+  }
+
+  // TODO: since numInts===128, we don't need to rewind intBuffer.
+  // however, tail of normal area may share a same int with head of exception area
+  // which means patchException may lose heading exceptions.
+  public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
+    if (excFirstPos == -1) {
+      return;
+    }
+    int curPos=excFirstPos;
+    int i,j;
+
+    if (excBytes == 1) {
+      for (i=0; i+3<excNum; i+=4) {
+        final int curInt = intBuffer.get();
+        curPos = patch(data, curPos, (curInt) & MASK[8]);
+        curPos = patch(data, curPos, (curInt >>> 8)  & MASK[8]);
+        curPos = patch(data, curPos, (curInt >>> 16) & MASK[8]);
+        curPos = patch(data, curPos, (curInt >>> 24) & MASK[8]);
+      }
+      if (i<excNum) { 
+        final int curInt = intBuffer.get();
+        for (j=0; j<32 && i<excNum; j+=8,i++) {
+          curPos = patch(data, curPos, (curInt >>> j) & MASK[8]);
+        }
+      }
+    } else if (excBytes == 2) {
+      for (i=0; i+1<excNum; i+=2) {
+        final int curInt = intBuffer.get();
+        curPos = patch(data, curPos, (curInt) & MASK[16]);
+        curPos = patch(data, curPos, (curInt >>> 16) & MASK[16]);
+      }
+      if (i<excNum) {
+        final int curInt = intBuffer.get();
+        curPos = patch(data, curPos, (curInt) & MASK[16]);
+      }
+    } else if (excBytes == 4) {
+      for (i=0; i<excNum; i++) {
+        curPos = patch(data, curPos, intBuffer.get());
+      }
+    }
+  }
+
+  static int patch(int[]data, int pos, int value) {
+    int nextPos = data[pos] + pos + 1;
+    data[pos] = value;
+    assert nextPos > pos;
+    return nextPos;
+  }
+
+  // TODO: shall we use 32 NumBits directly if it exceeds 28 bits?
+  static int getNumBits(final int[] data, int size) {
+    int optBits=1;
+    int optSize=estimateCompressedSize(data,size,1);
+    for (int i=2; i<=32; ++i) {
+      int curSize=estimateCompressedSize(data,size,i);
+      if (curSize<optSize) {
+        optSize=curSize;
+        optBits=i;
+      }
+    }
+    return optBits;
+  }
+
+  // loosely estimate int size of each compressed block, based on parameter b
+  // ignore force exceptions
+  static int estimateCompressedSize(final int[] data, int size, int numBits) {
+    int totalBytes=(numBits*size+7)/8;   // always round to byte
+    int excNum=0;
+    int curExcBytes=1;
+    for (int i=0; i<size; ++i) {
+      if ((data[i] & ~MASK[numBits]) != 0) {   // exception
+        excNum++;
+        if (curExcBytes<2 && (data[i] & ~MASK[8]) != 0) { // exceed 1 byte exception
+          curExcBytes=2;
+        }
+        if (curExcBytes<4 && (data[i] & ~MASK[16]) != 0) { // exceed 2 byte exception
+          curExcBytes=4;
+        }
+      }
+    }
+    if (curExcBytes==2) {
+      totalBytes=((totalBytes+1)/2)*2;  // round up to 2x bytes before filling exceptions
+    }
+    else if (curExcBytes==4) {
+      totalBytes=((totalBytes+3)/4)*4;  // round up to 4x bytes
+    }
+    totalBytes+=excNum*curExcBytes;
+
+    return totalBytes/4*4+HEADER_INT_SIZE;  // round up to ints
+  }
+  /** The 4 byte header (32 bits) contains (from lsb to msb):
+   *
+   * - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
+   *
+   * - 8 bits for exception num - 1 (when no exceptions, this is undefined)
+   *
+   * - 8 bits for the index of the first exception + 1 (when no exception, this is 0)
+   *
+   * - 5 bits for num of frame bits - 1
+   * - 2 bits for the exception code: 00: byte, 01: short, 10: int
+   * - 1 bit unused
+   *
+   */
+  static int getHeader(int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
+    return  (numInts-1)
+          | (((excNum-1) & MASK[8]) << 8)
+          | ((excFirstPos+1) << 16)
+          | ((numBits-1) << 24)
+          | ((excBytes/2) << 29);
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java
@ -19,7 +19,7 @@ package org.apache.lucene.codecs.pfor;

 import java.nio.IntBuffer;

-final class ForDecompressImpl {
+final class PackedIntsDecompress {

  // nocommit: assess perf of this to see if specializing is really needed

--- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
@ -21,7 +21,6 @@ Generate source code for java classes for FOR decompression.
 """

 USE_SCRATCH = False
-#USE_SCRATCH = True 

 def bitsExpr(i, numFrameBits):
  framePos = i * numFrameBits
@ -51,7 +50,7 @@ def bitsExpr(i, numFrameBits):


 def genDecompress():
-  className = "ForDecompressImpl"
+  className = "PackedIntsDecompress"
  fileName = className + ".java"
  imports = "import java.nio.IntBuffer;\n"
  f = open(fileName, 'w')
@ -80,7 +79,7 @@ def genDecompress():

    w("import java.nio.IntBuffer;\n\n")

-    w("final class ForDecompressImpl {\n")
+    w("final class PackedIntsDecompress {\n")

    w('\n  // nocommit: assess perf of this to see if specializing is really needed\n')

@ -118,7 +117,7 @@ def genDecompress():

 def genSwitch():
  for numFrameBits in xrange(1, 33):
-    print '      case %d: ForDecompressImpl.decode%d(compressedBuffer, encoded); break;' % (numFrameBits, numFrameBits)
+    print '      case %d: PackedIntsDecompress.decode%d(compressedBuffer, encoded); break;' % (numFrameBits, numFrameBits)

 if __name__ == "__main__":
  genDecompress()
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@ -18,3 +18,4 @@ org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
 org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
 org.apache.lucene.codecs.memory.MemoryPostingsFormat
 org.apache.lucene.codecs.pfor.ForPostingsFormat
+org.apache.lucene.codecs.pfor.PForPostingsFormat
--- a/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java
@ -23,7 +23,7 @@ import java.nio.*;
 import org.apache.lucene.codecs.pfor.*;
 import org.apache.lucene.util.LuceneTestCase;

-public class TestForUtil extends LuceneTestCase {
+public class TestPForUtil extends LuceneTestCase {
  static final int[] MASK={ 0x00000000,
    0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
    0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
@ -53,25 +53,25 @@ public class TestForUtil extends LuceneTestCase {
    byte[] res = new byte[4+sz*8];
    IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
    for (int i=0; i<sz-1; ++i)
-      buff[i]=gen.nextInt() & 0;
+      buff[i]=gen.nextInt() & 1;
    buff[sz-1]=gen.nextInt() & 0xffffffff;   // create only one exception

    Collections.shuffle(Arrays.asList(buff),new Random(seed));
    for (int i=0; i<sz; ++i)
      data[i] = buff[i];

-    int ensz = ForUtil.compress(data,sz,resBuffer);
+    int ensz = PForUtil.compress(data,sz,resBuffer);

    if (ensz > sz*8+4) {
      println("Excceed? "+ensz+">"+(sz*8+4));
      ensz=sz*8+4;
    }
    resBuffer.rewind();
-    ForUtil.decompress(resBuffer,copy);
+    PForUtil.decompress(resBuffer,copy);

-//    println(getHex(data,sz)+"\n");
-//    println(getHex(res,ensz)+"\n");
-//    println(getHex(copy,sz)+"\n");
+    //println(getHex(data,sz)+"\n");
+    //println(getHex(res,ensz)+"\n");
+    //println(getHex(copy,sz)+"\n");
    
    assert cmp(data,sz,copy,sz)==true;
  }
@ -99,15 +99,14 @@ public class TestForUtil extends LuceneTestCase {
    for (i=0; i<sz; ++i)
      data[i] = buff[i];

-    int ensz = ForUtil.compress(data,sz,resBuffer);
+    int ensz = PForUtil.compress(data,sz,resBuffer);
    
    if (ensz > sz*8+4) {
      println("Excceed? "+ensz+">"+(sz*8+4));
      ensz=sz*8+4;
    }
    int[] copy = new int[sz];
-
-    ForUtil.decompress(resBuffer,copy);
+    PForUtil.decompress(resBuffer,copy);

 //    println(getHex(data,sz)+"\n");
 //    println(getHex(res,ensz)+"\n");
--- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -76,7 +76,7 @@ import org.junit.BeforeClass;
 // we won't even be running the actual code, only the impostor
 // @SuppressCodecs("Lucene4x")
 // Sep codec cannot yet handle the offsets in our 4.x index!
-@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For"})
+@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For","PFor"})
 public class TestBackwardsCompatibility extends LuceneTestCase {

  // Uncomment these cases & run them on an older Lucene
--- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
@ -49,7 +49,7 @@ import org.apache.lucene.util._TestUtil;
 // TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs.
 // not all codecs store prx separate...
 // TODO: fix sep codec to index offsets so we can greatly reduce this list!
-@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For"})
+@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For","PFor"})
 public class TestPostingsOffsets extends LuceneTestCase {
  IndexWriterConfig iwc;
  
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@ -487,6 +487,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
    add("MockSep");
    add("MockRandom");
    add("For");
+    add("PFor");
  }};
  
  private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect, RandomIndexWriter iw) throws IOException {
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
@ -95,6 +95,7 @@ public class MockRandomPostingsFormat extends PostingsFormat {
      delegates.add(new MockVariableIntBlockPostingsFormat.MockIntFactory(baseBlockSize));
      // TODO: others
      delegates.add(new ForFactory());
+      delegates.add(new PForFactory());
    }

    private static String getExtension(String fileName) {