fork postingsformat: TempBlock

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1493376 13f79535-47bb-0310-9956-ffa450edef68
2013-06-15 15:04:13 +00:00 · 2013-06-15 15:04:13 +00:00 · ee49f10cf0
parent 16fe4335b2
commit ee49f10cf0
15 changed files with 6948 additions and 12 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsReaderBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsReaderBase.java
@ -0,0 +1,77 @@
 package org.apache.lucene.codecs;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Closeable;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.codecs.temp.TempTermState;
 /** The core terms dictionaries (BlockTermsReader,
 *  BlockTreeTermsReader) interact with a single instance
 *  of this class to manage creation of {@link DocsEnum} and
 *  {@link DocsAndPositionsEnum} instances.  It provides an
 *  IndexInput (termsIn) where this class may read any
 *  previously stored data that it had written in its
 *  corresponding {@link PostingsWriterBase} at indexing
 *  time. 
 *  @lucene.experimental */
 // TODO: find a better name; this defines the API that the
 // terms dict impls use to talk to a postings impl.
 // TermsDict + PostingsReader/WriterBase == PostingsConsumer/Producer
 public abstract class TempPostingsReaderBase implements Closeable {
  /** Sole constructor. (For invocation by subclass 
   *  constructors, typically implicit.) */
  protected TempPostingsReaderBase() {
  }
  /** Performs any initialization, such as reading and
   *  verifying the header from the provided terms
   *  dictionary {@link IndexInput}. */
  public abstract void init(IndexInput termsIn) throws IOException;
  /** Return a newly created empty TermState */
  public abstract TempTermState newTermState() throws IOException;
  /** Actually decode metadata for next term */
  public abstract void nextTerm(FieldInfo fieldInfo, TempTermState state) throws IOException;
  /** Must fully consume state, since after this call that
   *  TermState may be reused. */
  public abstract DocsEnum docs(FieldInfo fieldInfo, TempTermState state, Bits skipDocs, DocsEnum reuse, int flags) throws IOException;
  /** Must fully consume state, since after this call that
   *  TermState may be reused. */
  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TempTermState state, Bits skipDocs, DocsAndPositionsEnum reuse,
                                                        int flags) throws IOException;
  @Override
  public abstract void close() throws IOException;
  /** Reads data for all terms in the next block; this
   *  method should merely load the byte[] blob but not
   *  decode, which is done in {@link #nextTerm}. */
  public abstract void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, TempTermState termState) throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsWriterBase.java
@ -0,0 +1,73 @@
 package org.apache.lucene.codecs;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Closeable;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.index.FieldInfo;
 /**
 * Extension of {@link PostingsConsumer} to support pluggable term dictionaries.
 * <p>
 * This class contains additional hooks to interact with the provided
 * term dictionaries such as {@link BlockTreeTermsWriter}. If you want
 * to re-use an existing implementation and are only interested in
 * customizing the format of the postings list, extend this class
 * instead.
 * 
 * @see PostingsReaderBase
 * @lucene.experimental
 */
 // TODO: find a better name; this defines the API that the
 // terms dict impls use to talk to a postings impl.
 // TermsDict + PostingsReader/WriterBase == PostingsConsumer/Producer
 public abstract class TempPostingsWriterBase extends PostingsConsumer implements Closeable {
  /** Sole constructor. (For invocation by subclass 
   *  constructors, typically implicit.) */
  protected TempPostingsWriterBase() {
  }
  /** Called once after startup, before any terms have been
   *  added.  Implementations typically write a header to
   *  the provided {@code termsOut}. */
  public abstract void start(IndexOutput termsOut) throws IOException;
  /** Start a new term.  Note that a matching call to {@link
   *  #finishTerm(TermStats)} is done, only if the term has at least one
   *  document. */
  public abstract void startTerm() throws IOException;
  /** Flush count terms starting at start "backwards", as a
   *  block. start is a negative offset from the end of the
   *  terms stack, ie bigger start means further back in
   *  the stack. */
  public abstract void flushTermsBlock(int start, int count) throws IOException;
  /** Finishes the current term.  The provided {@link
   *  TermStats} contains the term's summary statistics. */
  public abstract void finishTerm(TermStats stats) throws IOException;
  /** Called when the writing switches to another field. */
  public abstract void setField(FieldInfo fieldInfo);
  @Override
  public abstract void close() throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
@ -33,7 +33,7 @@ import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZ
 * Encode all values in normal area with fixed bit width, 
 * which is determined by the max value in this block.
 */
-final class ForUtil {
+public final class ForUtil {
  /**
   * Special number of bits per value used whenever all values to encode are equal.
@ -44,7 +44,7 @@ final class ForUtil {
   * Upper limit of the number of bytes that might be required to stored
   * <code>BLOCK_SIZE</code> encoded values.
   */
-  static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4;
+  public static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4;
  /**
   * Upper limit of the number of values that might be decoded in a single call to
@ -52,7 +52,7 @@ final class ForUtil {
   * <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers
   * whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s.
   */
-  static final int MAX_DATA_SIZE;
+  public static final int MAX_DATA_SIZE;
  static {
    int maxDataSize = 0;
    for(int version=PackedInts.VERSION_START;version<=PackedInts.VERSION_CURRENT;version++) {
@ -96,7 +96,7 @@ final class ForUtil {
  /**
   * Create a new {@link ForUtil} instance and save state into <code>out</code>.
   */
-  ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
+  public ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
    out.writeVInt(PackedInts.VERSION_CURRENT);
    encodedSizes = new int[33];
    encoders = new PackedInts.Encoder[33];
@ -122,7 +122,7 @@ final class ForUtil {
  /**
   * Restore a {@link ForUtil} from a {@link DataInput}.
   */
-  ForUtil(DataInput in) throws IOException {
+  public ForUtil(DataInput in) throws IOException {
    int packedIntsVersion = in.readVInt();
    PackedInts.checkVersion(packedIntsVersion);
    encodedSizes = new int[33];
@ -154,7 +154,7 @@ final class ForUtil {
   * @param out      the destination output
   * @throws IOException If there is a low-level I/O error
   */
-  void writeBlock(int[] data, byte[] encoded, IndexOutput out) throws IOException {
+  public void writeBlock(int[] data, byte[] encoded, IndexOutput out) throws IOException {
    if (isAllEqual(data)) {
      out.writeByte((byte) ALL_VALUES_EQUAL);
      out.writeVInt(data[0]);
@ -183,7 +183,7 @@ final class ForUtil {
   * @param decoded   where to write decoded data
   * @throws IOException If there is a low-level I/O error
   */
-  void readBlock(IndexInput in, byte[] encoded, int[] decoded) throws IOException {
+  public void readBlock(IndexInput in, byte[] encoded, int[] decoded) throws IOException {
    final int numBits = in.readByte();
    assert numBits <= 32 : numBits;
@ -209,7 +209,7 @@ final class ForUtil {
   * @param in      the input where to read data
   * @throws IOException If there is a low-level I/O error
   */
-  void skipBlock(IndexInput in) throws IOException {
+  public void skipBlock(IndexInput in) throws IOException {
    final int numBits = in.readByte();
    if (numBits == ALL_VALUES_EQUAL) {
      in.readVInt();
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
@ -161,7 +161,7 @@ import org.apache.lucene.util.packed.PackedInts;
 *    <li>SkipFPDelta determines the position of this term's SkipData within the .doc
 *        file. In particular, it is the length of the TermFreq data.
 *        SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
- *        (i.e. 128 in Lucene41PostingsFormat).</li>
+ *        (i.e. 8 in Lucene41PostingsFormat).</li>
 *    <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
 *        of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the 
 *        single document ID is written to the term dictionary.</li>
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java
@ -50,7 +50,7 @@ import org.apache.lucene.store.IndexInput;
 * Therefore, we'll trim df before passing it to the interface. see trim(int)
 *
 */
-final class Lucene41SkipReader extends MultiLevelSkipListReader {
+public final class Lucene41SkipReader extends MultiLevelSkipListReader {
  // private boolean DEBUG = Lucene41PostingsReader.DEBUG;
  private final int blockSize;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java
@ -43,7 +43,7 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
 * 4. start offset.
 *
 */
-final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
+public final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
  // private boolean DEBUG = Lucene41PostingsReader.DEBUG;
  private int[] lastSkipDoc;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html
@ -381,7 +381,13 @@ the term dictionary. Stored fields are compressed by default. </li>
 <a name="Limitations" id="Limitations"></a>
 <h2>Limitations</h2>
 <div>
-<p>Lucene uses a Java <code>int</code> to refer to
+<p>When referring to term numbers, Lucene's current implementation uses a Java
 <code>int</code> to hold the term index, which means the
 maximum number of unique terms in any single index segment is ~2.1 billion
 times the term index interval (default 128) = ~274 billion. This is technically
 not a limitation of the index file format, just of Lucene's current
 implementation.</p>
 <p>Similarly, Lucene uses a Java <code>int</code> to refer to
 document numbers, and the index file format uses an <code>Int32</code>
 on-disk to store document numbers. This is a limitation
 of both the index file format and the current implementation. Eventually these
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermState.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermState.java
@ -0,0 +1,47 @@
 package org.apache.lucene.codecs.temp;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.TermState;
 public class TempBlockTermState extends TempTermState {
  /** the term's ord in the current block */
  public int termBlockOrd;
  /** Sole constructor. (For invocation by subclass 
   *  constructors, typically implicit.) */
  protected TempBlockTermState() {
  }
  public TempBlockTermState clone() {
    TempBlockTermState other = (TempBlockTermState)super.clone();
    return other;
  }
  @Override
  public void copyFrom(TermState _other) {
    assert _other instanceof TempBlockTermState : "can not copy from " + _other.getClass().getName();
    super.copyFrom(_other);
    TempBlockTermState other = (TempBlockTermState) _other;
    termBlockOrd = other.termBlockOrd;
  }
  @Override
  public String toString() {
    return super.toString() + " termBlockOrd=" + termBlockOrd;
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java
@ -0,0 +1,450 @@
 package org.apache.lucene.codecs.temp;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.MultiLevelSkipListWriter;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.TempPostingsReaderBase;
 import org.apache.lucene.codecs.TempPostingsWriterBase;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.PackedInts;
 /**
 * Lucene 4.1 postings format, which encodes postings in packed integer blocks 
 * for fast decode.
 *
 * <p><b>NOTE</b>: this format is still experimental and
 * subject to change without backwards compatibility.
 *
 * <p>
 * Basic idea:
 * <ul>
 *   <li>
 *   <b>Packed Blocks and VInt Blocks</b>: 
 *   <p>In packed blocks, integers are encoded with the same bit width ({@link PackedInts packed format}):
 *      the block size (i.e. number of integers inside block) is fixed (currently 128). Additionally blocks
 *      that are all the same value are encoded in an optimized way.</p>
 *   <p>In VInt blocks, integers are encoded as {@link DataOutput#writeVInt VInt}:
 *      the block size is variable.</p>
 *   </li>
 *
 *   <li> 
 *   <b>Block structure</b>: 
 *   <p>When the postings are long enough, TempPostingsFormat will try to encode most integer data 
 *      as a packed block.</p> 
 *   <p>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed 
 *      blocks, while the remaining 3 are encoded as one VInt block. </p>
 *   <p>Different kinds of data are always encoded separately into different packed blocks, but may 
 *      possibly be interleaved into the same VInt block. </p>
 *   <p>This strategy is applied to pairs: 
 *      &lt;document number, frequency&gt;,
 *      &lt;position, payload length&gt;, 
 *      &lt;position, offset start, offset length&gt;, and
 *      &lt;position, payload length, offsetstart, offset length&gt;.</p>
 *   </li>
 *
 *   <li>
 *   <b>Skipdata settings</b>: 
 *   <p>The structure of skip table is quite similar to previous version of Lucene. Skip interval is the 
 *      same as block size, and each skip entry points to the beginning of each block. However, for 
 *      the first block, skip data is omitted.</p>
 *   </li>
 *
 *   <li>
 *   <b>Positions, Payloads, and Offsets</b>: 
 *   <p>A position is an integer indicating where the term occurs within one document. 
 *      A payload is a blob of metadata associated with current position. 
 *      An offset is a pair of integers indicating the tokenized start/end offsets for given term 
 *      in current position: it is essentially a specialized payload. </p>
 *   <p>When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets (assuming a 
 *      null payload contributes one count). As mentioned in block structure, it is possible to encode 
 *      these three either combined or separately. 
 *   <p>In all cases, payloads and offsets are stored together. When encoded as a packed block, 
 *      position data is separated out as .pos, while payloads and offsets are encoded in .pay (payload 
 *      metadata will also be stored directly in .pay). When encoded as VInt blocks, all these three are 
 *      stored interleaved into the .pos (so is payload metadata).</p>
 *   <p>With this strategy, the majority of payload and offset data will be outside .pos file. 
 *      So for queries that require only position data, running on a full index with payloads and offsets, 
 *      this reduces disk pre-fetches.</p>
 *   </li>
 * </ul>
 * </p>
 *
 * <p>
 * Files and detailed format:
 * <ul>
 *   <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
 *   <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
 *   <li><tt>.doc</tt>: <a href="#Frequencies">Frequencies and Skip Data</a></li>
 *   <li><tt>.pos</tt>: <a href="#Positions">Positions</a></li>
 *   <li><tt>.pay</tt>: <a href="#Payloads">Payloads and Offsets</a></li>
 * </ul>
 * </p>
 *
 * <a name="Termdictionary" id="Termdictionary"></a>
 * <dl>
 * <dd>
 * <b>Term Dictionary</b>
 *
 * <p>The .tim file contains the list of terms in each
 * field along with per-term statistics (such as docfreq)
 * and pointers to the frequencies, positions, payload and
 * skip data in the .doc, .pos, and .pay files.
 * See {@link TempBlockTermsWriter} for more details on the format.
 * </p>
 *
 * <p>NOTE: The term dictionary can plug into different postings implementations:
 * the postings writer/reader are actually responsible for encoding 
 * and decoding the Postings Metadata and Term Metadata sections described here:</p>
 *
 * <ul>
 *   <li>Postings Metadata --&gt; Header, PackedBlockSize</li>
 *   <li>Term Metadata --&gt; (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?, 
 *                            SkipFPDelta?</li>
 *   <li>Header, --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>PackedBlockSize, SingletonDocID --&gt; {@link DataOutput#writeVInt VInt}</li>
 *   <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --&gt; {@link DataOutput#writeVLong VLong}</li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *    <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
 *        for the postings.</li>
 *    <li>PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width is 
 *        determined by the largest integer. Smaller block size result in smaller variance among width 
 *        of integers hence smaller indexes. Larger block size result in more efficient bulk i/o hence
 *        better acceleration. This value should always be a multiple of 64, currently fixed as 128 as 
 *        a tradeoff. It is also the skip interval used to accelerate {@link DocsEnum#advance(int)}.
 *    <li>DocFPDelta determines the position of this term's TermFreqs within the .doc file. 
 *        In particular, it is the difference of file offset between this term's
 *        data and previous term's data (or zero, for the first term in the block).On disk it is 
 *        stored as the difference from previous value in sequence. </li>
 *    <li>PosFPDelta determines the position of this term's TermPositions within the .pos file.
 *        While PayFPDelta determines the position of this term's &lt;TermPayloads, TermOffsets?&gt; within 
 *        the .pay file. Similar to DocFPDelta, it is the difference between two file positions (or 
 *        neglected, for fields that omit payloads and offsets).</li>
 *    <li>PosVIntBlockFPDelta determines the position of this term's last TermPosition in last pos packed
 *        block within the .pos file. It is synonym for PayVIntBlockFPDelta or OffsetVIntBlockFPDelta. 
 *        This is actually used to indicate whether it is necessary to load following
 *        payloads and offsets from .pos instead of .pay. Every time a new block of positions are to be 
 *        loaded, the PostingsReader will use this value to check whether current block is packed format
 *        or VInt. When packed format, payloads and offsets are fetched from .pay, otherwise from .pos. 
 *        (this value is neglected when total number of positions i.e. totalTermFreq is less or equal 
 *        to PackedBlockSize).
 *    <li>SkipFPDelta determines the position of this term's SkipData within the .doc
 *        file. In particular, it is the length of the TermFreq data.
 *        SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
 *        (i.e. 8 in TempPostingsFormat).</li>
 *    <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
 *        of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the 
 *        single document ID is written to the term dictionary.</li>
 * </ul>
 * </dd>
 * </dl>
 *
 * <a name="Termindex" id="Termindex"></a>
 * <dl>
 * <dd>
 * <b>Term Index</b>
 * <p>The .tip file contains an index into the term dictionary, so that it can be 
 * accessed randomly.  See {@link TempBlockTermsWriter} for more details on the format.</p>
 * </dd>
 * </dl>
 *
 *
 * <a name="Frequencies" id="Frequencies"></a>
 * <dl>
 * <dd>
 * <b>Frequencies and Skip Data</b>
 *
 * <p>The .doc file contains the lists of documents which contain each term, along
 * with the frequency of the term in that document (except when frequencies are
 * omitted: {@link IndexOptions#DOCS_ONLY}). It also saves skip data to the beginning of 
 * each packed or VInt block, when the length of document list is larger than packed block size.</p>
 *
 * <ul>
 *   <li>docFile(.doc) --&gt; Header, &lt;TermFreqs, SkipData?&gt;<sup>TermCount</sup></li>
 *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>TermFreqs --&gt; &lt;PackedBlock&gt; <sup>PackedDocBlockNum</sup>,  
 *                        VIntBlock? </li>
 *   <li>PackedBlock --&gt; PackedDocDeltaBlock, PackedFreqBlock?
 *   <li>VIntBlock --&gt; &lt;DocDelta[, Freq?]&gt;<sup>DocFreq-PackedBlockSize*PackedDocBlockNum</sup>
 *   <li>SkipData --&gt; &lt;&lt;SkipLevelLength, SkipLevel&gt;
 *       <sup>NumSkipLevels-1</sup>, SkipLevel&gt;, SkipDatum?</li>
 *   <li>SkipLevel --&gt; &lt;SkipDatum&gt; <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
 *   <li>SkipDatum --&gt; DocSkip, DocFPSkip, &lt;PosFPSkip, PosBlockOffset, PayLength?, 
 *                        PayFPSkip?&gt;?, SkipChildLevelPointer?</li>
 *   <li>PackedDocDeltaBlock, PackedFreqBlock --&gt; {@link PackedInts PackedInts}</li>
 *   <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto, PayFPSkip 
 *       --&gt; 
 *   {@link DataOutput#writeVInt VInt}</li>
 *   <li>SkipChildLevelPointer --&gt; {@link DataOutput#writeVLong VLong}</li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *   <li>PackedDocDeltaBlock is theoretically generated from two steps: 
 *     <ol>
 *       <li>Calculate the difference between each document number and previous one, 
 *           and get a d-gaps list (for the first document, use absolute value); </li>
 *       <li>For those d-gaps from first one to PackedDocBlockNum*PackedBlockSize<sup>th</sup>, 
 *           separately encode as packed blocks.</li>
 *     </ol>
 *     If frequencies are not omitted, PackedFreqBlock will be generated without d-gap step.
 *   </li>
 *   <li>VIntBlock stores remaining d-gaps (along with frequencies when possible) with a format 
 *       that encodes DocDelta and Freq:
 *       <p>DocDelta: if frequencies are indexed, this determines both the document
 *       number and the frequency. In particular, DocDelta/2 is the difference between
 *       this document number and the previous document number (or zero when this is the
 *       first document in a TermFreqs). When DocDelta is odd, the frequency is one.
 *       When DocDelta is even, the frequency is read as another VInt. If frequencies
 *       are omitted, DocDelta contains the gap (not multiplied by 2) between document
 *       numbers and no frequency information is stored.</p>
 *       <p>For example, the TermFreqs for a term which occurs once in document seven
 *          and three times in document eleven, with frequencies indexed, would be the
 *          following sequence of VInts:</p>
 *       <p>15, 8, 3</p>
 *       <p>If frequencies were omitted ({@link IndexOptions#DOCS_ONLY}) it would be this
 *          sequence of VInts instead:</p>
 *       <p>7,4</p>
 *   </li>
 *   <li>PackedDocBlockNum is the number of packed blocks for current term's docids or frequencies. 
 *       In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize) </li>
 *   <li>TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq. 
 *       We use this trick since the definition of skip entry is a little different from base interface.
 *       In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for
 *       skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list. However, 
 *       in TempPostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>, 
 *       2*skipInterval+1<sup>th</sup> ... posting (skipInterval==PackedBlockSize in this case). 
 *       When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one 
 *       more skip data than TempSkipWriter. </li>
 *   <li>SkipDatum is the metadata of one skip entry.
 *      For the first block (no matter packed or VInt), it is omitted.</li>
 *   <li>DocSkip records the document number of every PackedBlockSize<sup>th</sup> document number in
 *       the postings (i.e. last document number in each packed block). On disk it is stored as the 
 *       difference from previous value in the sequence. </li>
 *   <li>DocFPSkip records the file offsets of each block (excluding )posting at 
 *       PackedBlockSize+1<sup>th</sup>, 2*PackedBlockSize+1<sup>th</sup> ... , in DocFile. 
 *       The file offsets are relative to the start of current term's TermFreqs. 
 *       On disk it is also stored as the difference from previous SkipDatum in the sequence.</li>
 *   <li>Since positions and payloads are also block encoded, the skip should skip to related block first,
 *       then fetch the values according to in-block offset. PosFPSkip and PayFPSkip record the file 
 *       offsets of related block in .pos and .pay, respectively. While PosBlockOffset indicates
 *       which value to fetch inside the related block (PayBlockOffset is unnecessary since it is always
 *       equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of 
 *       current term's TermFreqs, and stored as a difference sequence.</li>
 *   <li>PayByteUpto indicates the start offset of the current payload. It is equivalent to
 *       the sum of the payload lengths in the current block up to PosBlockOffset</li>
 * </ul>
 * </dd>
 * </dl>
 *
 * <a name="Positions" id="Positions"></a>
 * <dl>
 * <dd>
 * <b>Positions</b>
 * <p>The .pos file contains the lists of positions that each term occurs at within documents. It also
 *    sometimes stores part of payloads and offsets for speedup.</p>
 * <ul>
 *   <li>PosFile(.pos) --&gt; Header, &lt;TermPositions&gt; <sup>TermCount</sup></li>
 *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>TermPositions --&gt; &lt;PackedPosDeltaBlock&gt; <sup>PackedPosBlockNum</sup>,  
 *                            VIntBlock? </li>
 *   <li>VIntBlock --&gt; &lt;PositionDelta[, PayloadLength?], PayloadData?, 
 *                        OffsetDelta?, OffsetLength?&gt;<sup>PosVIntCount</sup>
 *   <li>PackedPosDeltaBlock --&gt; {@link PackedInts PackedInts}</li>
 *   <li>PositionDelta, OffsetDelta, OffsetLength --&gt; 
 *       {@link DataOutput#writeVInt VInt}</li>
 *   <li>PayloadData --&gt; {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *   <li>TermPositions are order by term (terms are implicit, from the term dictionary), and position 
 *       values for each term document pair are incremental, and ordered by document number.</li>
 *   <li>PackedPosBlockNum is the number of packed blocks for current term's positions, payloads or offsets. 
 *       In particular, PackedPosBlockNum = floor(totalTermFreq/PackedBlockSize) </li>
 *   <li>PosVIntCount is the number of positions encoded as VInt format. In particular, 
 *       PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize</li>
 *   <li>The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock 
 *       in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.</li>
 *   <li>PositionDelta is, if payloads are disabled for the term's field, the
 *       difference between the position of the current occurrence in the document and
 *       the previous occurrence (or zero, if this is the first occurrence in this
 *       document). If payloads are enabled for the term's field, then PositionDelta/2
 *       is the difference between the current and the previous position. If payloads
 *       are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
 *       the length of the payload at the current term position.</li>
 *   <li>For example, the TermPositions for a term which occurs as the fourth term in
 *       one document, and as the fifth and ninth term in a subsequent document, would
 *       be the following sequence of VInts (payloads disabled):
 *       <p>4, 5, 4</p></li>
 *   <li>PayloadData is metadata associated with the current term position. If
 *       PayloadLength is stored at the current position, then it indicates the length
 *       of this payload. If PayloadLength is not stored, then this payload has the same
 *       length as the payload at the previous position.</li>
 *   <li>OffsetDelta/2 is the difference between this position's startOffset from the
 *       previous occurrence (or zero, if this is the first occurrence in this document).
 *       If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the
 *       previous occurrence and an OffsetLength follows. Offset data is only written for
 *       {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.</li>
 * </ul>
 * </dd>
 * </dl>
 *
 * <a name="Payloads" id="Payloads"></a>
 * <dl>
 * <dd>
 * <b>Payloads and Offsets</b>
 * <p>The .pay file will store payloads and offsets associated with certain term-document positions. 
 *    Some payloads and offsets will be separated out into .pos file, for performance reasons.</p>
 * <ul>
 *   <li>PayFile(.pay): --&gt; Header, &lt;TermPayloads, TermOffsets?&gt; <sup>TermCount</sup></li>
 *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>TermPayloads --&gt; &lt;PackedPayLengthBlock, SumPayLength, PayData&gt; <sup>PackedPayBlockNum</sup>
 *   <li>TermOffsets --&gt; &lt;PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock&gt; <sup>PackedPayBlockNum</sup>
 *   <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --&gt; {@link PackedInts PackedInts}</li>
 *   <li>SumPayLength --&gt; {@link DataOutput#writeVInt VInt}</li>
 *   <li>PayData --&gt; {@link DataOutput#writeByte byte}<sup>SumPayLength</sup></li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *   <li>The order of TermPayloads/TermOffsets will be the same as TermPositions, note that part of 
 *       payload/offsets are stored in .pos.</li>
 *   <li>The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is the 
 *       same as PackedFreqBlock in chapter <a href="#Frequencies">Frequencies and Skip Data</a>. 
 *       While PackedStartDeltaBlock follows a same procedure as PackedDocDeltaBlock.</li>
 *   <li>PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also synonym 
 *       for PackedOffsetBlockNum.</li>
 *   <li>SumPayLength is the total length of payloads written within one block, should be the sum
 *       of PayLengths in one packed block.</li>
 *   <li>PayLength in PackedPayLengthBlock is the length of each payload associated with the current 
 *       position.</li>
 * </ul>
 * </dd>
 * </dl>
 * </p>
 *
 * @lucene.experimental
 */
 public final class TempPostingsFormat extends PostingsFormat {
  /**
   * Filename extension for document number, frequencies, and skip data.
   * See chapter: <a href="#Frequencies">Frequencies and Skip Data</a>
   */
  public static final String DOC_EXTENSION = "doc";
  /**
   * Filename extension for positions. 
   * See chapter: <a href="#Positions">Positions</a>
   */
  public static final String POS_EXTENSION = "pos";
  /**
   * Filename extension for payloads and offsets.
   * See chapter: <a href="#Payloads">Payloads and Offsets</a>
   */
  public static final String PAY_EXTENSION = "pay";
  private final int minTermBlockSize;
  private final int maxTermBlockSize;
  /**
   * Fixed packed block size, number of integers encoded in 
   * a single packed block.
   */
  // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
  public final static int BLOCK_SIZE = 128;
  /** Creates {@code TempPostingsFormat} with default
   *  settings. */
  public TempPostingsFormat() {
    this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
  }
  /** Creates {@code TempPostingsFormat} with custom
   *  values for {@code minBlockSize} and {@code
   *  maxBlockSize} passed to block terms dictionary.
   *  @see TempBlockTermsWriter#TempBlockTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
  public TempPostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
    super("TempBlock");
    this.minTermBlockSize = minTermBlockSize;
    assert minTermBlockSize > 1;
    this.maxTermBlockSize = maxTermBlockSize;
    assert minTermBlockSize <= maxTermBlockSize;
  }
  @Override
  public String toString() {
    return getName() + "(blocksize=" + BLOCK_SIZE + ")";
  }
  @Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    TempPostingsWriterBase postingsWriter = new TempPostingsWriter(state);
    boolean success = false;
    try {
      FieldsConsumer ret = new TempBlockTermsWriter(state, 
                                                    postingsWriter,
                                                    minTermBlockSize, 
                                                    maxTermBlockSize);
      success = true;
      return ret;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(postingsWriter);
      }
    }
  }
  @Override
  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
    TempPostingsReaderBase postingsReader = new TempPostingsReader(state.directory,
                                                                state.fieldInfos,
                                                                state.segmentInfo,
                                                                state.context,
                                                                state.segmentSuffix);
    boolean success = false;
    try {
      FieldsProducer ret = new TempBlockTermsReader(state.directory,
                                                    state.fieldInfos,
                                                    state.segmentInfo,
                                                    postingsReader,
                                                    state.context,
                                                    state.segmentSuffix,
                                                    state.termsIndexDivisor);
      success = true;
      return ret;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(postingsReader);
      }
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
@ -0,0 +1,586 @@
 package org.apache.lucene.codecs.temp;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE;
 import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE;
 import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.TempPostingsWriterBase;
 import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.codecs.lucene41.Lucene41SkipWriter;
 import org.apache.lucene.codecs.lucene41.ForUtil;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.PackedInts;
 /**
 * Concrete class that writes docId(maybe frq,pos,offset,payloads) list
 * with postings format.
 *
 * Postings list for each term will be stored separately. 
 *
 * @see Lucene41SkipWriter for details about skipping setting and postings layout.
 * @lucene.experimental
 */
 public final class TempPostingsWriter extends TempPostingsWriterBase {
  /** 
   * Expert: The maximum number of skip levels. Smaller values result in 
   * slightly smaller indexes, but slower skipping in big posting lists.
   */
  static final int maxSkipLevels = 10;
  final static String TERMS_CODEC = "TempPostingsWriterTerms";
  final static String DOC_CODEC = "TempPostingsWriterDoc";
  final static String POS_CODEC = "TempPostingsWriterPos";
  final static String PAY_CODEC = "TempPostingsWriterPay";
  // Increment version to change it
  final static int VERSION_START = 0;
  final static int VERSION_CURRENT = VERSION_START;
  final IndexOutput docOut;
  final IndexOutput posOut;
  final IndexOutput payOut;
  private IndexOutput termsOut;
  // How current field indexes postings:
  private boolean fieldHasFreqs;
  private boolean fieldHasPositions;
  private boolean fieldHasOffsets;
  private boolean fieldHasPayloads;
  // Holds starting file pointers for each term:
  private long docTermStartFP;
  private long posTermStartFP;
  private long payTermStartFP;
  final int[] docDeltaBuffer;
  final int[] freqBuffer;
  private int docBufferUpto;
  final int[] posDeltaBuffer;
  final int[] payloadLengthBuffer;
  final int[] offsetStartDeltaBuffer;
  final int[] offsetLengthBuffer;
  private int posBufferUpto;
  private byte[] payloadBytes;
  private int payloadByteUpto;
  private int lastBlockDocID;
  private long lastBlockPosFP;
  private long lastBlockPayFP;
  private int lastBlockPosBufferUpto;
  private int lastBlockPayloadByteUpto;
  private int lastDocID;
  private int lastPosition;
  private int lastStartOffset;
  private int docCount;
  final byte[] encoded;
  private final ForUtil forUtil;
  private final Lucene41SkipWriter skipWriter;
  /** Creates a postings writer with the specified PackedInts overhead ratio */
  // TODO: does this ctor even make sense?
  public TempPostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException {
    super();
    docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempPostingsFormat.DOC_EXTENSION),
                                          state.context);
    IndexOutput posOut = null;
    IndexOutput payOut = null;
    boolean success = false;
    try {
      CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
      forUtil = new ForUtil(acceptableOverheadRatio, docOut);
      if (state.fieldInfos.hasProx()) {
        posDeltaBuffer = new int[MAX_DATA_SIZE];
        posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempPostingsFormat.POS_EXTENSION),
                                              state.context);
        CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
        if (state.fieldInfos.hasPayloads()) {
          payloadBytes = new byte[128];
          payloadLengthBuffer = new int[MAX_DATA_SIZE];
        } else {
          payloadBytes = null;
          payloadLengthBuffer = null;
        }
        if (state.fieldInfos.hasOffsets()) {
          offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
          offsetLengthBuffer = new int[MAX_DATA_SIZE];
        } else {
          offsetStartDeltaBuffer = null;
          offsetLengthBuffer = null;
        }
        if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
          payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempPostingsFormat.PAY_EXTENSION),
                                                state.context);
          CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT);
        }
      } else {
        posDeltaBuffer = null;
        payloadLengthBuffer = null;
        offsetStartDeltaBuffer = null;
        offsetLengthBuffer = null;
        payloadBytes = null;
      }
      this.payOut = payOut;
      this.posOut = posOut;
      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
      }
    }
    docDeltaBuffer = new int[MAX_DATA_SIZE];
    freqBuffer = new int[MAX_DATA_SIZE];
    // TODO: should we try skipping every 2/4 blocks...?
    skipWriter = new Lucene41SkipWriter(maxSkipLevels,
                                     BLOCK_SIZE, 
                                     state.segmentInfo.getDocCount(),
                                     docOut,
                                     posOut,
                                     payOut);
    encoded = new byte[MAX_ENCODED_SIZE];
  }
  /** Creates a postings writer with <code>PackedInts.COMPACT</code> */
  public TempPostingsWriter(SegmentWriteState state) throws IOException {
    this(state, PackedInts.COMPACT);
  }
  @Override
  public void start(IndexOutput termsOut) throws IOException {
    this.termsOut = termsOut;
    CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
    termsOut.writeVInt(BLOCK_SIZE);
  }
  @Override
  public void setField(FieldInfo fieldInfo) {
    IndexOptions indexOptions = fieldInfo.getIndexOptions();
    fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
    fieldHasPayloads = fieldInfo.hasPayloads();
    skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads);
  }
  @Override
  public void startTerm() {
    docTermStartFP = docOut.getFilePointer();
    if (fieldHasPositions) {
      posTermStartFP = posOut.getFilePointer();
      if (fieldHasPayloads || fieldHasOffsets) {
        payTermStartFP = payOut.getFilePointer();
      }
    }
    lastDocID = 0;
    lastBlockDocID = -1;
    // if (DEBUG) {
    //   System.out.println("FPW.startTerm startFP=" + docTermStartFP);
    // }
    skipWriter.resetSkip();
  }
  @Override
  public void startDoc(int docID, int termDocFreq) throws IOException {
    // if (DEBUG) {
    //   System.out.println("FPW.startDoc docID["+docBufferUpto+"]=" + docID);
    // }
    // Have collected a block of docs, and get a new doc. 
    // Should write skip data as well as postings list for
    // current block.
    if (lastBlockDocID != -1 && docBufferUpto == 0) {
      // if (DEBUG) {
      //   System.out.println("  bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1));
      // }
      skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto);
    }
    final int docDelta = docID - lastDocID;
    if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
      throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
    }
    docDeltaBuffer[docBufferUpto] = docDelta;
    // if (DEBUG) {
    //   System.out.println("  docDeltaBuffer[" + docBufferUpto + "]=" + docDelta);
    // }
    if (fieldHasFreqs) {
      freqBuffer[docBufferUpto] = termDocFreq;
    }
    docBufferUpto++;
    docCount++;
    if (docBufferUpto == BLOCK_SIZE) {
      // if (DEBUG) {
      //   System.out.println("  write docDelta block @ fp=" + docOut.getFilePointer());
      // }
      forUtil.writeBlock(docDeltaBuffer, encoded, docOut);
      if (fieldHasFreqs) {
        // if (DEBUG) {
        //   System.out.println("  write freq block @ fp=" + docOut.getFilePointer());
        // }
        forUtil.writeBlock(freqBuffer, encoded, docOut);
      }
      // NOTE: don't set docBufferUpto back to 0 here;
      // finishDoc will do so (because it needs to see that
      // the block was filled so it can save skip data)
    }
    lastDocID = docID;
    lastPosition = 0;
    lastStartOffset = 0;
  }
  /** Add a new position & payload */
  @Override
  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
    // if (DEBUG) {
    //   System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: ""));
    // }
    posDeltaBuffer[posBufferUpto] = position - lastPosition;
    if (fieldHasPayloads) {
      if (payload == null || payload.length == 0) {
        // no payload
        payloadLengthBuffer[posBufferUpto] = 0;
      } else {
        payloadLengthBuffer[posBufferUpto] = payload.length;
        if (payloadByteUpto + payload.length > payloadBytes.length) {
          payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payload.length);
        }
        System.arraycopy(payload.bytes, payload.offset, payloadBytes, payloadByteUpto, payload.length);
        payloadByteUpto += payload.length;
      }
    }
    if (fieldHasOffsets) {
      assert startOffset >= lastStartOffset;
      assert endOffset >= startOffset;
      offsetStartDeltaBuffer[posBufferUpto] = startOffset - lastStartOffset;
      offsetLengthBuffer[posBufferUpto] = endOffset - startOffset;
      lastStartOffset = startOffset;
    }
    posBufferUpto++;
    lastPosition = position;
    if (posBufferUpto == BLOCK_SIZE) {
      // if (DEBUG) {
      //   System.out.println("  write pos bulk block @ fp=" + posOut.getFilePointer());
      // }
      forUtil.writeBlock(posDeltaBuffer, encoded, posOut);
      if (fieldHasPayloads) {
        forUtil.writeBlock(payloadLengthBuffer, encoded, payOut);
        payOut.writeVInt(payloadByteUpto);
        payOut.writeBytes(payloadBytes, 0, payloadByteUpto);
        payloadByteUpto = 0;
      }
      if (fieldHasOffsets) {
        forUtil.writeBlock(offsetStartDeltaBuffer, encoded, payOut);
        forUtil.writeBlock(offsetLengthBuffer, encoded, payOut);
      }
      posBufferUpto = 0;
    }
  }
  @Override
  public void finishDoc() throws IOException {
    // Since we don't know df for current term, we had to buffer
    // those skip data for each block, and when a new doc comes, 
    // write them to skip file.
    if (docBufferUpto == BLOCK_SIZE) {
      lastBlockDocID = lastDocID;
      if (posOut != null) {
        if (payOut != null) {
          lastBlockPayFP = payOut.getFilePointer();
        }
        lastBlockPosFP = posOut.getFilePointer();
        lastBlockPosBufferUpto = posBufferUpto;
        lastBlockPayloadByteUpto = payloadByteUpto;
      }
      // if (DEBUG) {
      //   System.out.println("  docBufferUpto="+docBufferUpto+" now get lastBlockDocID="+lastBlockDocID+" lastBlockPosFP=" + lastBlockPosFP + " lastBlockPosBufferUpto=" +  lastBlockPosBufferUpto + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUpto);
      // }
      docBufferUpto = 0;
    }
  }
  private static class PendingTerm {
    public final long docStartFP;
    public final long posStartFP;
    public final long payStartFP;
    public final long skipOffset;
    public final long lastPosBlockOffset;
    public final int singletonDocID;
    public PendingTerm(long docStartFP, long posStartFP, long payStartFP, long skipOffset, long lastPosBlockOffset, int singletonDocID) {
      this.docStartFP = docStartFP;
      this.posStartFP = posStartFP;
      this.payStartFP = payStartFP;
      this.skipOffset = skipOffset;
      this.lastPosBlockOffset = lastPosBlockOffset;
      this.singletonDocID = singletonDocID;
    }
  }
  private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
  /** Called when we are done adding docs to this term */
  @Override
  public void finishTerm(TermStats stats) throws IOException {
    assert stats.docFreq > 0;
    // TODO: wasteful we are counting this (counting # docs
    // for this term) in two places?
    assert stats.docFreq == docCount: stats.docFreq + " vs " + docCount;
    // if (DEBUG) {
    //   System.out.println("FPW.finishTerm docFreq=" + stats.docFreq);
    // }
    // if (DEBUG) {
    //   if (docBufferUpto > 0) {
    //     System.out.println("  write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docTermStartFP=" + docTermStartFP);
    //   }
    // }
    // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
    final int singletonDocID;
    if (stats.docFreq == 1) {
      // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
      singletonDocID = docDeltaBuffer[0];
    } else {
      singletonDocID = -1;
      // vInt encode the remaining doc deltas and freqs:
      for(int i=0;i<docBufferUpto;i++) {
        final int docDelta = docDeltaBuffer[i];
        final int freq = freqBuffer[i];
        if (!fieldHasFreqs) {
          docOut.writeVInt(docDelta);
        } else if (freqBuffer[i] == 1) {
          docOut.writeVInt((docDelta<<1)|1);
        } else {
          docOut.writeVInt(docDelta<<1);
          docOut.writeVInt(freq);
        }
      }
    }
    final long lastPosBlockOffset;
    if (fieldHasPositions) {
      // if (DEBUG) {
      //   if (posBufferUpto > 0) {
      //     System.out.println("  write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posTermStartFP=" + posTermStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets);
      //   }
      // }
      // totalTermFreq is just total number of positions(or payloads, or offsets)
      // associated with current term.
      assert stats.totalTermFreq != -1;
      if (stats.totalTermFreq > BLOCK_SIZE) {
        // record file offset for last pos in last block
        lastPosBlockOffset = posOut.getFilePointer() - posTermStartFP;
      } else {
        lastPosBlockOffset = -1;
      }
      if (posBufferUpto > 0) {       
        // TODO: should we send offsets/payloads to
        // .pay...?  seems wasteful (have to store extra
        // vLong for low (< BLOCK_SIZE) DF terms = vast vast
        // majority)
        // vInt encode the remaining positions/payloads/offsets:
        int lastPayloadLength = -1;  // force first payload length to be written
        int lastOffsetLength = -1;   // force first offset length to be written
        int payloadBytesReadUpto = 0;
        for(int i=0;i<posBufferUpto;i++) {
          final int posDelta = posDeltaBuffer[i];
          if (fieldHasPayloads) {
            final int payloadLength = payloadLengthBuffer[i];
            if (payloadLength != lastPayloadLength) {
              lastPayloadLength = payloadLength;
              posOut.writeVInt((posDelta<<1)|1);
              posOut.writeVInt(payloadLength);
            } else {
              posOut.writeVInt(posDelta<<1);
            }
            // if (DEBUG) {
            //   System.out.println("        i=" + i + " payloadLen=" + payloadLength);
            // }
            if (payloadLength != 0) {
              // if (DEBUG) {
              //   System.out.println("          write payload @ pos.fp=" + posOut.getFilePointer());
              // }
              posOut.writeBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
              payloadBytesReadUpto += payloadLength;
            }
          } else {
            posOut.writeVInt(posDelta);
          }
          if (fieldHasOffsets) {
            // if (DEBUG) {
            //   System.out.println("          write offset @ pos.fp=" + posOut.getFilePointer());
            // }
            int delta = offsetStartDeltaBuffer[i];
            int length = offsetLengthBuffer[i];
            if (length == lastOffsetLength) {
              posOut.writeVInt(delta << 1);
            } else {
              posOut.writeVInt(delta << 1 | 1);
              posOut.writeVInt(length);
              lastOffsetLength = length;
            }
          }
        }
        if (fieldHasPayloads) {
          assert payloadBytesReadUpto == payloadByteUpto;
          payloadByteUpto = 0;
        }
      }
      // if (DEBUG) {
      //   System.out.println("  totalTermFreq=" + stats.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
      // }
    } else {
      lastPosBlockOffset = -1;
    }
    long skipOffset;
    if (docCount > BLOCK_SIZE) {
      skipOffset = skipWriter.writeSkip(docOut) - docTermStartFP;
      // if (DEBUG) {
      //   System.out.println("skip packet " + (docOut.getFilePointer() - (docTermStartFP + skipOffset)) + " bytes");
      // }
    } else {
      skipOffset = -1;
      // if (DEBUG) {
      //   System.out.println("  no skip: docCount=" + docCount);
      // }
    }
    long payStartFP;
    if (stats.totalTermFreq >= BLOCK_SIZE) {
      payStartFP = payTermStartFP;
    } else {
      payStartFP = -1;
    }
    // if (DEBUG) {
    //   System.out.println("  payStartFP=" + payStartFP);
    // }
    pendingTerms.add(new PendingTerm(docTermStartFP, posTermStartFP, payStartFP, skipOffset, lastPosBlockOffset, singletonDocID));
    docBufferUpto = 0;
    posBufferUpto = 0;
    lastDocID = 0;
    docCount = 0;
  }
  private final RAMOutputStream bytesWriter = new RAMOutputStream();
  @Override
  public void flushTermsBlock(int start, int count) throws IOException {
    if (count == 0) {
      termsOut.writeByte((byte) 0);
      return;
    }
    assert start <= pendingTerms.size();
    assert count <= start;
    final int limit = pendingTerms.size() - start + count;
    long lastDocStartFP = 0;
    long lastPosStartFP = 0;
    long lastPayStartFP = 0;
    for(int idx=limit-count; idx<limit; idx++) {
      PendingTerm term = pendingTerms.get(idx);
      if (term.singletonDocID == -1) {
        bytesWriter.writeVLong(term.docStartFP - lastDocStartFP);
        lastDocStartFP = term.docStartFP;
      } else {
        bytesWriter.writeVInt(term.singletonDocID);
      }
      if (fieldHasPositions) {
        bytesWriter.writeVLong(term.posStartFP - lastPosStartFP);
        lastPosStartFP = term.posStartFP;
        if (term.lastPosBlockOffset != -1) {
          bytesWriter.writeVLong(term.lastPosBlockOffset);
        }
        if ((fieldHasPayloads || fieldHasOffsets) && term.payStartFP != -1) {
          bytesWriter.writeVLong(term.payStartFP - lastPayStartFP);
          lastPayStartFP = term.payStartFP;
        }
      }
      if (term.skipOffset != -1) {
        bytesWriter.writeVLong(term.skipOffset);
      }
    }
    termsOut.writeVInt((int) bytesWriter.getFilePointer());
    bytesWriter.writeTo(termsOut);
    bytesWriter.reset();
    // Remove the terms we just wrote:
    pendingTerms.subList(limit-count, limit).clear();
  }
  @Override
  public void close() throws IOException {
    IOUtils.close(docOut, posOut, payOut);
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermState.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermState.java
@ -0,0 +1,54 @@
 package org.apache.lucene.codecs.temp;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.DocsEnum; // javadocs
 import org.apache.lucene.index.TermState;
 /**
 * Holds all state required for {@link PostingsReaderBase}
 * to produce a {@link DocsEnum} without re-seeking the
 * terms dict.
 */
 public class TempTermState extends TermState {
  /** how many docs have this term */
  public int docFreq;
  /** total number of occurrences of this term */
  public long totalTermFreq;
  /** the term's ord in the current block */
  public int termBlockOrd;
  /** Sole constructor. (For invocation by subclass 
   *  constructors, typically implicit.) */
  protected TempTermState() {
  }
  @Override
  public void copyFrom(TermState _other) {
    assert _other instanceof TempTermState : "can not copy from " + _other.getClass().getName();
    TempTermState other = (TempTermState) _other;
    docFreq = other.docFreq;
    totalTermFreq = other.totalTermFreq;
    termBlockOrd = other.termBlockOrd;
  }
  @Override
  public String toString() {
    return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd;
  }
 }
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@ -15,3 +15,4 @@
 org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
 org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat
 org.apache.lucene.codecs.temp.TempPostingsFormat