LUCENE-3892: some cleanup/refactoring

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1369697 13f79535-47bb-0310-9956-ffa450edef68
2012-08-05 22:39:32 +00:00 · 2012-08-05 22:39:32 +00:00 · de9586104c
parent aed1b5d760
commit de9586104c
6 changed files with 199 additions and 210 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java
@ -31,8 +31,8 @@ import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.IOUtils;

 /**
- * Pass ForFactory to a PostingsWriter/ReaderBase, and get 
- * customized postings format plugged.
+ * Encodes/decode postings in packed int blocks for faster
+ * decode.
 */
 public final class BlockPostingsFormat extends PostingsFormat {
  public static final String DOC_EXTENSION = "doc";
@ -41,7 +41,8 @@ public final class BlockPostingsFormat extends PostingsFormat {

  private final int minTermBlockSize;
  private final int maxTermBlockSize;
-  public final static int DEFAULT_BLOCK_SIZE = 128;
+
+  public final static int BLOCK_SIZE = 128;

  public BlockPostingsFormat() {
    this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
@ -57,13 +58,12 @@ public final class BlockPostingsFormat extends PostingsFormat {

  @Override
  public String toString() {
-    return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE + ")";
+    return getName() + "(blocksize=" + BLOCK_SIZE + ")";
  }

  @Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    // TODO: implement a new PostingsWriterBase to improve skip-settings
-    PostingsWriterBase postingsWriter = new BlockPostingsWriter(state, 128);
+    PostingsWriterBase postingsWriter = new BlockPostingsWriter(state);

    boolean success = false;
    try {
@ -86,8 +86,7 @@ public final class BlockPostingsFormat extends PostingsFormat {
                                                                state.fieldInfos,
                                                                state.segmentInfo,
                                                                state.context,
-                                                                state.segmentSuffix,
-                                                                128);
+                                                                state.segmentSuffix);
    boolean success = false;
    try {
      FieldsProducer ret = new BlockTreeTermsReader(state.dir,
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
@ -43,7 +43,14 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;

-// nocommit javadocs
+
+/**
+ * Concrete class that reads docId(maybe frq,pos,offset,payloads) list
+ * with postings format.
+ *
+ * @see BlockSkipReader for details
+ *
+ */
 public final class BlockPostingsReader extends PostingsReaderBase {

  private final IndexInput docIn;
@ -56,9 +63,9 @@ public final class BlockPostingsReader extends PostingsReaderBase {
  final String segment;

  // NOTE: not private to avoid access$NNN methods:
-  final int blockSize;
+  final static int blockSize = BlockPostingsFormat.BLOCK_SIZE;

-  public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix, int blockSize) throws IOException {
+  public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
    boolean success = false;
    segment = segmentInfo.name;
    IndexInput docIn = null;
@ -99,8 +106,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
        IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
      }
    }
-
-    this.blockSize = blockSize;
  }

  @Override
@ -116,6 +121,24 @@ public final class BlockPostingsReader extends PostingsReaderBase {
    }
  }

+  static void readVIntBlock(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
+    if (indexHasFreq) {
+      for(int i=0;i<num;i++) {
+        final int code = docIn.readVInt();
+        docBuffer[i] = code >>> 1;
+        if ((code & 1) != 0) {
+          freqBuffer[i] = 1;
+        } else {
+          freqBuffer[i] = docIn.readVInt();
+        }
+      }
+    } else {
+      for(int i=0;i<num;i++) {
+        docBuffer[i] = docIn.readVInt();
+      }
+    }
+  }
+
  static void readBlock(IndexInput in, byte[] encoded, IntBuffer encodedBuffer, int[] buffer) throws IOException {
    int header = in.readVInt();
    in.readBytes(encoded, 0, ForUtil.getEncodedSize(header));
@ -343,7 +366,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();
-      encoded = new byte[blockSize*4 + 4];
+      encoded = new byte[blockSize*4];
      encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();      
    }

@ -386,6 +409,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
    }
    
    private void refillDocs() throws IOException {
+      //System.out.println("["+docFreq+"]"+" refillDoc");
      final int left = docFreq - docUpto;
      assert left > 0;

@ -394,7 +418,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
          System.out.println("    fill doc block from fp=" + docIn.getFilePointer());
        }
        readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer);
-
        if (indexHasFreq) {
          if (DEBUG) {
            System.out.println("    fill freq block from fp=" + docIn.getFilePointer());
@ -402,50 +425,33 @@ public final class BlockPostingsReader extends PostingsReaderBase {
          readBlock(docIn, encoded, encodedBuffer, freqBuffer);
        }
      } else {
-        // Read vInts:
        if (DEBUG) {
          System.out.println("    fill last vInt block from fp=" + docIn.getFilePointer());
        }
-        for(int i=0;i<left;i++) {
-          final int code = docIn.readVInt();
-          if (indexHasFreq) {
-            docDeltaBuffer[i] = code >>> 1;
-            if ((code & 1) != 0) {
-              freqBuffer[i] = 1;
-            } else {
-              freqBuffer[i] = docIn.readVInt();
-            }
-          } else {
-            docDeltaBuffer[i] = code;
-          }
-        }
+        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
      }
      docBufferUpto = 0;
    }

    @Override
    public int nextDoc() throws IOException {
-
      if (DEBUG) {
        System.out.println("\nFPR.nextDoc");
      }
-
      while (true) {
        if (DEBUG) {
          System.out.println("  docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
        }
-
        if (docUpto == docFreq) {
          if (DEBUG) {
            System.out.println("  return doc=END");
          }
          return doc = NO_MORE_DOCS;
        }
-
+        //System.out.println("["+docFreq+"]"+" nextDoc");
        if (docBufferUpto == blockSize) {
          refillDocs();
        }
-
        if (DEBUG) {
          System.out.println("    accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]);
        }
@ -461,23 +467,19 @@ public final class BlockPostingsReader extends PostingsReaderBase {
          }
          return doc;
        }
-
        if (DEBUG) {
          System.out.println("  doc=" + accum + " is deleted; try next doc");
        }
-
        docBufferUpto++;
      }
    }
-    
+
    @Override
    public int advance(int target) throws IOException {
      // nocommit make frq block load lazy/skippable

-      // nocommit 2 is heuristic guess!!
-      // nocommit put cheating back!  does it help?
      // nocommit use skipper!!!  it has next last doc id!!
-      //if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) {
+
      if (docFreq > blockSize && target - accum > blockSize) {

        if (DEBUG) {
@ -506,18 +508,16 @@ public final class BlockPostingsReader extends PostingsReaderBase {

        if (newDocUpto > docUpto) {
          // Skipper moved
-
          if (DEBUG) {
            System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer());
          }
-
          assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto;
          docUpto = newDocUpto+1;

-          // Force block read next:
+          // Force to read next block
          docBufferUpto = blockSize;
-          accum = skipper.getDoc();
-          docIn.seek(skipper.getDocPointer());
+          accum = skipper.getDoc();               // actually, this is just lastSkipEntry
+          docIn.seek(skipper.getDocPointer());    // now point to the block we want to search
        }
      }

@ -530,11 +530,9 @@ public final class BlockPostingsReader extends PostingsReaderBase {
          return doc;
        }
      }
-
      if (DEBUG) {
        System.out.println("  advance return doc=END");
      }
-
      return NO_MORE_DOCS;
    }
  }
@ -604,7 +602,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      this.startDocIn = BlockPostingsReader.this.docIn;
      this.docIn = (IndexInput) startDocIn.clone();
      this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone();
-      encoded = new byte[blockSize*4 + 4];
+      encoded = new byte[blockSize*4];
      encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();
@ -656,35 +654,23 @@ public final class BlockPostingsReader extends PostingsReaderBase {
    }

    private void refillDocs() throws IOException {
+      //System.out.println("["+docFreq+"]"+" refillDoc");
      final int left = docFreq - docUpto;
      assert left > 0;
-
      if (left >= blockSize) {
        if (DEBUG) {
          System.out.println("    fill doc block from fp=" + docIn.getFilePointer());
        }
-
        readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer);
-
        if (DEBUG) {
          System.out.println("    fill freq block from fp=" + docIn.getFilePointer());
        }
-
        readBlock(docIn, encoded, encodedBuffer, freqBuffer);
      } else {
-        // Read vInts:
        if (DEBUG) {
          System.out.println("    fill last vInt doc block from fp=" + docIn.getFilePointer());
        }
-        for(int i=0;i<left;i++) {
-          final int code = docIn.readVInt();
-          docDeltaBuffer[i] = code >>> 1;
-          if ((code & 1) != 0) {
-            freqBuffer[i] = 1;
-          } else {
-            freqBuffer[i] = docIn.readVInt();
-          }
-        }
+        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
      }
      docBufferUpto = 0;
    }
@ -712,7 +698,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
          } else {
            posDeltaBuffer[i] = code;
          }
-
          if (indexHasOffsets) {
            posIn.readVInt();
            posIn.readVInt();
@ -728,24 +713,20 @@ public final class BlockPostingsReader extends PostingsReaderBase {

    @Override
    public int nextDoc() throws IOException {
-
      if (DEBUG) {
        System.out.println("  FPR.nextDoc");
      }
-
      while (true) {
        if (DEBUG) {
          System.out.println("    docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
        }
-
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
-
+        //System.out.println("["+docFreq+"]"+" nextDoc");
        if (docBufferUpto == blockSize) {
          refillDocs();
        }
-
        if (DEBUG) {
          System.out.println("    accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]);
        }
@ -757,13 +738,12 @@ public final class BlockPostingsReader extends PostingsReaderBase {

        if (liveDocs == null || liveDocs.get(accum)) {
          doc = accum;
+          position = 0;
          if (DEBUG) {
            System.out.println("    return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount);
          }
-          position = 0;
          return doc;
        }
-
        if (DEBUG) {
          System.out.println("    doc=" + accum + " is deleted; try next doc");
        }
@ -782,11 +762,9 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      // nocommit use skipper!!!  it has next last doc id!!
      //if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) {
      if (docFreq > blockSize && target - accum > blockSize) {
-
        if (DEBUG) {
          System.out.println("    try skipper");
        }
-
        if (skipper == null) {
          // Lazy init: first time this enum has ever been used for skipping
          if (DEBUG) {
@ -815,7 +793,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {

        if (newDocUpto > docUpto) {
          // Skipper moved
-
          if (DEBUG) {
            System.out.println("    skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto());
          }
@ -823,7 +800,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
          assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto;
          docUpto = newDocUpto+1;

-          // Force block read next:
+          // Force to read next block
          docBufferUpto = blockSize;
          accum = skipper.getDoc();
          docIn.seek(skipper.getDocPointer());
@ -1024,7 +1001,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      this.docIn = (IndexInput) startDocIn.clone();
      this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone();
      this.payIn = (IndexInput) BlockPostingsReader.this.payIn.clone();
-      encoded = new byte[blockSize*4 + 4];
+      encoded = new byte[blockSize*4];
      encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      if (indexHasOffsets) {
@ -1096,6 +1073,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
    }

    private void refillDocs() throws IOException {
+      //System.out.println("["+docFreq+"]"+" refillDoc");
      final int left = docFreq - docUpto;
      assert left > 0;

@ -1103,28 +1081,16 @@ public final class BlockPostingsReader extends PostingsReaderBase {
        if (DEBUG) {
          System.out.println("    fill doc block from fp=" + docIn.getFilePointer());
        }
-
        readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer);
-
        if (DEBUG) {
          System.out.println("    fill freq block from fp=" + docIn.getFilePointer());
        }
-
        readBlock(docIn, encoded, encodedBuffer, freqBuffer);
      } else {
-        // Read vInts:
        if (DEBUG) {
          System.out.println("    fill last vInt doc block from fp=" + docIn.getFilePointer());
        }
-        for(int i=0;i<left;i++) {
-          final int code = docIn.readVInt();
-          docDeltaBuffer[i] = code >>> 1;
-          if ((code & 1) != 0) {
-            freqBuffer[i] = 1;
-          } else {
-            freqBuffer[i] = docIn.readVInt();
-          }
-        }
+        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
      }
      docBufferUpto = 0;
    }
@ -1209,29 +1175,24 @@ public final class BlockPostingsReader extends PostingsReaderBase {

    @Override
    public int nextDoc() throws IOException {
-
      if (DEBUG) {
        System.out.println("  FPR.nextDoc");
      }
-
      if (indexHasPayloads) {
        payloadByteUpto += payloadLength;
        payloadLength = 0;
      }
-
      while (true) {
        if (DEBUG) {
          System.out.println("    docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
        }
-
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
-
+        //System.out.println("["+docFreq+"]"+" nextDoc");
        if (docBufferUpto == blockSize) {
          refillDocs();
        }
-
        if (DEBUG) {
          System.out.println("    accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]);
        }
@ -1251,7 +1212,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
          lastStartOffset = 0;
          return doc;
        }
-
        if (DEBUG) {
          System.out.println("    doc=" + accum + " is deleted; try next doc");
        }
@ -1303,15 +1263,13 @@ public final class BlockPostingsReader extends PostingsReaderBase {

        if (newDocUpto > docUpto) {
          // Skipper moved
-
          if (DEBUG) {
            System.out.println("    skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset);
          }
-
          assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto;
          docUpto = newDocUpto+1;

-          // Force block read next:
+          // Force to read next block
          docBufferUpto = blockSize;
          accum = skipper.getDoc();
          docIn.seek(skipper.getDocPointer());
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java
@ -37,8 +37,15 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;

-// nocommit javadocs
-
+/**
+ * Concrete class that writes docId(maybe frq,pos,offset,payloads) list
+ * with postings format.
+ *
+ * Postings list for each term will be stored separately. 
+ *
+ * @see BlockSkipWriter for details about skipping setting and postings layout.
+ *
+ */
 public final class BlockPostingsWriter extends PostingsWriterBase {

  private boolean DEBUG = BlockPostingsReader.DEBUG;
@ -60,9 +67,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
  final IndexOutput posOut;
  final IndexOutput payOut;

-  static final int DEFAULT_BLOCK_SIZE = 128;
-
-  final int blockSize;
+  final static int blockSize = BlockPostingsFormat.BLOCK_SIZE;

  private IndexOutput termsOut;

@ -91,12 +96,12 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
  private int payloadByteUpto;

  private int lastBlockDocID;
-  private boolean saveNextPosBlock;
  private long lastBlockPosFP;
  private long lastBlockPayFP;
  private int lastBlockPosBufferUpto;
  private int lastBlockStartOffset;
  private int lastBlockPayloadByteUpto;
+
  private int lastDocID;
  private int lastPosition;
  private int lastStartOffset;
@ -107,9 +112,8 @@ public final class BlockPostingsWriter extends PostingsWriterBase {

  private final BlockSkipWriter skipWriter;
  
-  public BlockPostingsWriter(SegmentWriteState state, int blockSize) throws IOException {
+  public BlockPostingsWriter(SegmentWriteState state) throws IOException {
    super();
-    this.blockSize = blockSize;

    docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.DOC_EXTENSION),
                                          state.context);
@ -164,14 +168,14 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
    docDeltaBuffer = new int[blockSize];
    freqBuffer = new int[blockSize];

-    skipWriter = new BlockSkipWriter(blockSize,
-                                     maxSkipLevels, 
+    skipWriter = new BlockSkipWriter(maxSkipLevels, 
+                                     blockSize,
                                     state.segmentInfo.getDocCount(),
                                     docOut,
                                     posOut,
                                     payOut);

-    encoded = new byte[blockSize*4 + 4];
+    encoded = new byte[blockSize*4];
    encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
  }

@ -201,8 +205,8 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
        payTermStartFP = payOut.getFilePointer();
      }
    }
-    lastBlockDocID = -1;
    lastDocID = 0;
+    lastBlockDocID = -1;
    if (DEBUG) {
      System.out.println("FPW.startTerm startFP=" + docTermStartFP);
    }
@ -211,7 +215,6 @@ public final class BlockPostingsWriter extends PostingsWriterBase {

  private void writeBlock(int[] buffer, IndexOutput out) throws IOException {
    final int header = ForUtil.compress(buffer, encodedBuffer);
-    //System.out.println("    block has " + numBytes + " bytes");
    out.writeVInt(header);
    out.writeBytes(encoded, ForUtil.getEncodedSize(header));
  }
@ -219,61 +222,25 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
  @Override
  public void startDoc(int docID, int termDocFreq) throws IOException {
    if (DEBUG) {
-      System.out.println("FPW.startDoc docID=" + docID);
+      System.out.println("FPW.startDoc docID["+docBufferUpto+"]=" + docID);
    }
-
-    // nocommit do this in finishDoc... but does it fail...?
-    // is it not always called...?
-    if (posOut != null && saveNextPosBlock) {
-      lastBlockPosFP = posOut.getFilePointer();
-      if (payOut != null) {
-        lastBlockPayFP = payOut.getFilePointer();
-      }
-      lastBlockPosBufferUpto = posBufferUpto;
-      lastBlockStartOffset = lastStartOffset;
-      lastBlockPayloadByteUpto = payloadByteUpto;
-      saveNextPosBlock = false;
-      if (DEBUG) {
-        System.out.println("  now save lastBlockPosFP=" + lastBlockPosFP + " lastBlockPosBufferUpto=" + lastBlockPosBufferUpto + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUpto);
-      }
-    }
-
    final int docDelta = docID - lastDocID;
+
    if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
      throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
    }
-    lastDocID = docID;

    docDeltaBuffer[docBufferUpto] = docDelta;
-    if (DEBUG) {
-      System.out.println("  docDeltaBuffer[" + docBufferUpto + "]=" + docDelta);
-    }
+//    if (DEBUG) {
+//      System.out.println("  docDeltaBuffer[" + docBufferUpto + "]=" + docDelta);
+//    }
    if (fieldHasFreqs) {
      freqBuffer[docBufferUpto] = termDocFreq;
    }
-
    docBufferUpto++;
    docCount++;

    if (docBufferUpto == blockSize) {
-      // nocommit maybe instead of buffering skip before
-      // writing a block based on last block's end data
-      // ... we could buffer after writing the block?  only
-      // iffiness with that approach is it could be a
-      // pointlness skip?  like we may stop adding docs
-      // right after that, then we have skip point AFTER
-      // last doc.  the thing is, in finishTerm we are
-      // already sometimes adding a skip point AFTER the
-      // last doc?
-      if (lastBlockDocID != -1) {
-        if (DEBUG) {
-          System.out.println("  bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-blockSize));
-        }
-        skipWriter.bufferSkip(lastBlockDocID, docCount-blockSize, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
-      }
-      lastBlockDocID = docID;
-      saveNextPosBlock = true;
-
      if (DEBUG) {
        System.out.println("  write docDelta block @ fp=" + docOut.getFilePointer());
      }
@ -284,9 +251,11 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
        }
        writeBlock(freqBuffer, docOut);
      }
-      docBufferUpto = 0;
+      // NOTE: don't set docBufferUpto back to 0 here;
+      // finishDoc will do so (because it needs to see that
+      // the block was filled so it can save skip data)
    }
-
+    lastDocID = docID;
    lastPosition = 0;
    lastStartOffset = 0;
  }
@ -294,9 +263,9 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
  /** Add a new position & payload */
  @Override
  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
-    if (DEBUG) {
-      System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: ""));
-    }
+//    if (DEBUG) {
+//      System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: ""));
+//    }
    posDeltaBuffer[posBufferUpto] = position - lastPosition;
    if (fieldHasPayloads) {
      if (payload == null || payload.length == 0) {
@ -343,7 +312,39 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
  }

  @Override
-  public void finishDoc() {
+  public void finishDoc() throws IOException {
+    // Have collected a block of docs, and get a new doc. 
+    // Should write skip data as well as postings list for
+    // current block
+
+    if (lastBlockDocID != -1 && docBufferUpto == 1) {
+      // nocomit move to startDoc?  ie we can write skip
+      // data as soon as the next doc starts...
+      if (DEBUG) {
+        System.out.println("  bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1));
+      }
+      skipWriter.bufferSkip(lastBlockDocID, docCount-1, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
+    }
+
+    // Since we don't know df for current term, we had to buffer
+    // those skip data for each block, and when a new doc comes, 
+    // write them to skip file.
+    if (docBufferUpto == blockSize) {
+      lastBlockDocID = lastDocID;
+      if (posOut != null) {
+        if (payOut != null) {
+          lastBlockPayFP = payOut.getFilePointer();
+        }
+        lastBlockPosFP = posOut.getFilePointer();
+        lastBlockPosBufferUpto = posBufferUpto;
+        lastBlockStartOffset = lastStartOffset;
+        lastBlockPayloadByteUpto = payloadByteUpto;
+      }
+      if (DEBUG) {
+        System.out.println("  docBufferUpto="+docBufferUpto+" now get lastBlockDocID="+lastBlockDocID+" lastBlockPosFP=" + lastBlockPosFP + " lastBlockPosBufferUpto=" + lastBlockPosBufferUpto + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUpto);
+      }
+      docBufferUpto = 0;
+    }
  }

  private static class PendingTerm {
@ -367,7 +368,6 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
  /** Called when we are done adding docs to this term */
  @Override
  public void finishTerm(TermStats stats) throws IOException {
-
    assert stats.docFreq > 0;

    // TODO: wasteful we are counting this (counting # docs
@ -378,19 +378,6 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
      System.out.println("FPW.finishTerm docFreq=" + stats.docFreq);
    }

-    // nocommit silly that skipper must write skip when we no
-    // postings come after it, but if we don't do this, skip
-    // reader incorrectly thinks it can read another level 0
-    // skip entry here!:
-    //if (docCount > blockSize && docBufferUpto > 0) {
-    if (docCount > blockSize) {
-      final int lastDocCount = blockSize*(docCount/blockSize);
-      if (DEBUG) {
-        System.out.println("  bufferSkip at finishTerm: lastDocID=" + lastBlockDocID + " docCount=" + lastDocCount);
-      }
-      skipWriter.bufferSkip(lastBlockDocID, lastDocCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
-    }
-
    if (DEBUG) {
      if (docBufferUpto > 0) {
        System.out.println("  write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docTermStartFP=" + docTermStartFP);
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java
@ -24,14 +24,35 @@ import org.apache.lucene.codecs.MultiLevelSkipListReader;
 import org.apache.lucene.store.IndexInput;

 /**
- * Implements the skip list reader for the 4.0 posting list format
+ * Implements the skip list reader for block postings format
 * that stores positions and payloads.
 * 
- * @see Lucene40PostingsFormat
- * @lucene.experimental
+ * Although this skipper uses MultiLevelSkipListReader as an interface, 
+ * its definition of skip position will be a little different. 
+ *
+ * For example, when skipInterval = blockSize = 3, df = 2*skipInterval = 6, 
+ * 
+ * 0 1 2 3 4 5
+ * d d d d d d    (posting list)
+ *     ^     ^    (skip point in MultiLeveSkipWriter)
+ *       ^        (skip point in BlockSkipWriter)
+ *
+ * In this case, MultiLevelSkipListReader will use the last document as a skip point, 
+ * while BlockSkipReader should assume no skip point will comes. 
+ *
+ * If we use the interface directly in BlockSkipReader, it may silly try to read 
+ * another skip data after the only skip point is loaded. 
+ *
+ * To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId,
+ * and numSkipped+blockSize== df, the MultiLevelSkipListReader will assume the skip list
+ * isn't exhausted yet, and try to load a non-existed skip point
+ *
+ * Therefore, we'll trim df before passing it to the interface. see trim(int)
+ *
 */
 final class BlockSkipReader extends MultiLevelSkipListReader {
  private boolean DEBUG = BlockPostingsReader.DEBUG;
+  private int blockSize;

  private long docPointer[];
  private long posPointer[];
@ -47,8 +68,9 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
  private long lastDocPointer;
  private int lastPosBufferUpto;

-  public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
-    super(skipStream, maxSkipLevels, skipInterval);
+  public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int blockSize, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
+    super(skipStream, maxSkipLevels, blockSize);
+    this.blockSize = blockSize;
    docPointer = new long[maxSkipLevels];
    if (hasPos) {
      posPointer = new long[maxSkipLevels];
@ -73,8 +95,22 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
    }
  }

+
+  /**
+   * Trim original docFreq to tell skipReader read proper number of skip points.
+   *
+   * Since our definition in BlockSkip* is a little different from MultiLevelSkip*
+   * This trimed docFreq will prevent skipReader from:
+   * 1. silly reading a non-existed skip point after the last block boundary
+   * 2. moving into the vInt block
+   *
+   */
+  protected int trim(int df) {
+    return df % blockSize == 0? df - 1: df;
+  }
+
  public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) {
-    super.init(skipPointer, df);
+    super.init(skipPointer, trim(df));
    lastDocPointer = docBasePointer;
    lastPosPointer = posBasePointer;
    lastPayPointer = payBasePointer;
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java
@ -23,10 +23,29 @@ import java.util.Arrays;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.codecs.MultiLevelSkipListWriter;

-// nocommit do we need more frequent skips at level > 0?
-// 128*128 is immense?  may need to decouple
+// nocommit may need to decouple
 // baseSkipInterval & theRestSkipInterval?

+/**
+ * Write skip lists with multiple levels, and support skip within block ints.
+ *
+ * Assume that docFreq = 28, skipInterval = blockSize = 12
+ *
+ *  |       block#0       | |      block#1        | |vInts|
+ *  d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
+ *                          ^                       ^       (level 0 skip point)
+ *
+ * Note that skipWriter will ignore first document in block#0, since 
+ * it is useless as a skip point.  Also, we'll never skip into the vInts
+ * block, only record skip data at the start its start point(if it exist).
+ *
+ * For each skip point, we will record: 
+ * 1. lastDocID, 
+ * 2. its related file points(position, payload), 
+ * 3. related numbers or uptos(position, payload).
+ * 4. start offset.
+ *
+ */
 final class BlockSkipWriter extends MultiLevelSkipListWriter {
  private boolean DEBUG = BlockPostingsReader.DEBUG;
  
@ -52,8 +71,8 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
  private boolean fieldHasOffsets;
  private boolean fieldHasPayloads;

-  public BlockSkipWriter(int skipInterval, int maxSkipLevels, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
-    super(skipInterval, maxSkipLevels, docCount);
+  public BlockSkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
+    super(blockSize, maxSkipLevels, docCount);
    this.docOut = docOut;
    this.posOut = posOut;
    this.payOut = payOut;
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java
@ -22,7 +22,7 @@ import java.nio.IntBuffer;
 * Encode all values in normal area with fixed bit width, 
 * which is determined by the max value in this block.
 */
-public class ForUtil {
+public final class ForUtil {
  protected static final int[] MASK = {   0x00000000,
    0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
    0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
@ -31,24 +31,24 @@ public class ForUtil {
    0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
    0x7fffffff, 0xffffffff};

+  final static int blockSize = BlockPostingsFormat.BLOCK_SIZE;
+
  /** Compress given int[] into Integer buffer, with For format
   *
   * @param data        uncompressed data
-   * @param size        num of ints to compress
   * @param intBuffer   integer buffer to hold compressed data
-   * @return encoded block byte size
+   * @return the header for current block 
   */
  public static int compress(final int[] data, IntBuffer intBuffer) {
-    int numBits=getNumBits(data);
+    int numBits = getNumBits(data);
    if (numBits == 0) {
-      return compressDuplicateBlock(data,intBuffer);
+      return compressDuplicateBlock(data, intBuffer);
    }
 
-    int size=data.length;
-    int encodedSize = (size*numBits+31)/32;
+    int encodedSize = (blockSize*numBits+31)/32;

-    for (int i=0; i<size; ++i) {
-      encodeNormalValue(intBuffer,i,data[i], numBits);
+    for (int i=0; i<blockSize; ++i) {
+      encodeNormalValue(intBuffer, i, data[i], numBits);
    }

    return getHeader(encodedSize, numBits);
@ -58,7 +58,7 @@ public class ForUtil {
   * Save only one int when the whole block equals to 1
   */
  static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
-    intBuffer.put(0,data[0]);
+    intBuffer.put(0, data[0]);
    return getHeader(1, 0);
  }

@ -66,6 +66,7 @@ public class ForUtil {
   *
   * @param intBuffer   integer buffer to hold compressed data
   * @param data        int array to hold uncompressed data
+   * @param header      header of current block, which contains numFrameBits
   */
  public static void decompress(IntBuffer intBuffer, int[] data, int header) {
    // since this buffer is reused at upper level, rewind first
@ -73,22 +74,12 @@ public class ForUtil {

    // nocommit assert header isn't "malformed", ie besides
    // numBytes / bit-width there is nothing else!
-
    int numBits = ((header >> 8) & MASK[6]);

    decompressCore(intBuffer, data, numBits);
  }

-  /**
-   * IntBuffer will not be rewinded in this method, therefore
-   * caller should ensure that the position is set to the first
-   * encoded int before decoding.
-   */
  static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) {
-    assert numBits<=32;
-    assert numBits>=0;
-
-    // TODO: PackedIntsDecompress is hardewired to size==128 only
    switch(numBits) {
      case 0: PackedIntsDecompress.decode0(intBuffer, data); break;
      case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
@ -163,6 +154,7 @@ public class ForUtil {
    return optBits;
  }

+  // nocommit: we must have a util function for this, hmm?
  protected static boolean isAllEqual(final int[] data) {
    int len = data.length;
    int v = data[0];
@ -177,23 +169,21 @@ public class ForUtil {
  /** 
   * Generate the 4 byte header, which contains (from lsb to msb):
   *
-   * 8 bits for encoded block int size (excluded header, this limits DEFAULT_BLOCK_SIZE <= 2^8)
   * 6 bits for num of frame bits (when 0, values in this block are all the same)
-   * other bits unused
+   * other bits for encoded block int size (excluded header), so we can use crazy block size
   *
   */
  static int getHeader(int encodedSize, int numBits) {
-    return  (encodedSize)
-          | ((numBits) << 8);
+    return numBits | (encodedSize << 6);
  }

  /** 
   * Expert: get metadata from header. 
   */
-  public static int getEncodedSize(int header) {
-    return ((header & MASK[8]))*4;
+  static int getNumBits(int header) {
+    return ((header & MASK[6]));
  }
-  public static int getNumBits(int header) {
-    return ((header >> 8) & MASK[6]);
+  static int getEncodedSize(int header) {
+    return ((header >>> 6))*4;
  }
 }