reader part, support basic enums

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1500391 13f79535-47bb-0310-9956-ffa450edef68
2013-07-07 09:14:17 +00:00 · 2013-07-07 09:14:17 +00:00 · 9f6db24cee
parent d6e2f4b663
commit 9f6db24cee
9 changed files with 288 additions and 105 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java
@ -56,7 +56,7 @@ import org.apache.lucene.util.packed.PackedInts;
 *
 *   <li> 
 *   <b>Block structure</b>: 
- *   <p>When the postings are long enough, TempPostingsFormat will try to encode most integer data 
+ *   <p>When the postings are long enough, TempBlockPostingsFormat will try to encode most integer data 
 *      as a packed block.</p> 
 *   <p>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed 
 *      blocks, while the remaining 3 are encoded as one VInt block. </p>
@ -159,7 +159,7 @@ import org.apache.lucene.util.packed.PackedInts;
 *    <li>SkipFPDelta determines the position of this term's SkipData within the .doc
 *        file. In particular, it is the length of the TermFreq data.
 *        SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
- *        (i.e. 8 in TempPostingsFormat).</li>
+ *        (i.e. 8 in TempBlockPostingsFormat).</li>
 *    <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
 *        of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the 
 *        single document ID is written to the term dictionary.</li>
@ -239,7 +239,7 @@ import org.apache.lucene.util.packed.PackedInts;
 *       We use this trick since the definition of skip entry is a little different from base interface.
 *       In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for
 *       skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list. However, 
- *       in TempPostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>, 
+ *       in TempBlockPostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>, 
 *       2*skipInterval+1<sup>th</sup> ... posting (skipInterval==PackedBlockSize in this case). 
 *       When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one 
 *       more skip data than TempSkipWriter. </li>
@ -352,7 +352,7 @@ import org.apache.lucene.util.packed.PackedInts;
 * @lucene.experimental
 */

-public final class TempPostingsFormat extends PostingsFormat {
+public final class TempBlockPostingsFormat extends PostingsFormat {
  /**
   * Filename extension for document number, frequencies, and skip data.
   * See chapter: <a href="#Frequencies">Frequencies and Skip Data</a>
@ -381,20 +381,17 @@ public final class TempPostingsFormat extends PostingsFormat {
  // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
  public final static int BLOCK_SIZE = 128;

-  /** Creates {@code TempPostingsFormat} with default
+  /** Creates {@code TempBlockPostingsFormat} with default
   *  settings. */
-  public TempPostingsFormat() {
-    super("TempFST");
-    minTermBlockSize = 0;
-    maxTermBlockSize = 0;
-    //this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+  public TempBlockPostingsFormat() {
+    this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
  }

-  /** Creates {@code TempPostingsFormat} with custom
+  /** Creates {@code TempBlockPostingsFormat} with custom
   *  values for {@code minBlockSize} and {@code
   *  maxBlockSize} passed to block terms dictionary.
   *  @see TempBlockTermsWriter#TempBlockTermsWriter(SegmentWriteState,TempPostingsWriterBase,int,int) */
-  public TempPostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
+  public TempBlockPostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
    super("TempBlock");
    this.minTermBlockSize = minTermBlockSize;
    assert minTermBlockSize > 1;
@ -413,11 +410,10 @@ public final class TempPostingsFormat extends PostingsFormat {

    boolean success = false;
    try {
-      //FieldsConsumer ret = new TempBlockTermsWriter(state, 
-      //                                              postingsWriter,
-      //                                              minTermBlockSize, 
-      //                                              maxTermBlockSize);
-      FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter);
+      FieldsConsumer ret = new TempBlockTermsWriter(state, 
+                                                    postingsWriter,
+                                                    minTermBlockSize, 
+                                                    maxTermBlockSize);
      success = true;
      return ret;
    } finally {
@ -436,14 +432,13 @@ public final class TempPostingsFormat extends PostingsFormat {
                                                                state.segmentSuffix);
    boolean success = false;
    try {
-      //FieldsProducer ret = new TempBlockTermsReader(state.directory,
-      //                                              state.fieldInfos,
-      //                                              state.segmentInfo,
-      //                                              postingsReader,
-      //                                              state.context,
-      //                                              state.segmentSuffix,
-      //                                              state.termsIndexDivisor);
-      FieldsProducer ret = new TempFSTTermsReader(state, postingsReader);
+      FieldsProducer ret = new TempBlockTermsReader(state.directory,
+                                                    state.fieldInfos,
+                                                    state.segmentInfo,
+                                                    postingsReader,
+                                                    state.context,
+                                                    state.segmentSuffix,
+                                                    state.termsIndexDivisor);
      success = true;
      return ret;
    } finally {
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java
@ -0,0 +1,77 @@
+package org.apache.lucene.codecs.temp;
+
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.TempPostingsReaderBase;
+import org.apache.lucene.codecs.TempPostingsWriterBase;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.IOUtils;
+
+public final class TempFSTPostingsFormat extends PostingsFormat {
+  public TempFSTPostingsFormat() {
+    super("TempFST");
+  }
+
+  @Override
+  public String toString() {
+    return getName();
+  }
+
+  @Override
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    TempPostingsWriterBase postingsWriter = new TempPostingsWriter(state);
+
+    boolean success = false;
+    try {
+      FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(postingsWriter);
+      }
+    }
+  }
+
+  @Override
+  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+    TempPostingsReaderBase postingsReader = new TempPostingsReader(state.directory,
+                                                                state.fieldInfos,
+                                                                state.segmentInfo,
+                                                                state.context,
+                                                                state.segmentSuffix);
+    boolean success = false;
+    try {
+      FieldsProducer ret = new TempFSTTermsReader(state, postingsReader);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(postingsReader);
+      }
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
@ -18,8 +18,6 @@ package org.apache.lucene.codecs.temp;
 */

 import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.File;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
@ -43,23 +41,19 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-import org.apache.lucene.util.automaton.RunAutomaton;
-import org.apache.lucene.util.automaton.Transition;
-import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.BytesRefFSTEnum;
+import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;
-import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.TempPostingsReaderBase;
 import org.apache.lucene.codecs.CodecUtil;

-
 public class TempFSTTermsReader extends FieldsProducer {
  final TempPostingsReaderBase postingsReader;
  final IndexInput in;
-  final TreeMap<String, FieldReader> fields = new TreeMap<String, FieldReader>();
-
+  final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
+  boolean DEBUG = false;

  public TempFSTTermsReader(SegmentReadState state, TempPostingsReaderBase postingsReader) throws IOException {
    final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTTermsWriter.TERMS_EXTENSION);
@ -83,8 +77,8 @@ public class TempFSTTermsReader extends FieldsProducer {
        long sumDocFreq = in.readVLong();
        int docCount = in.readVInt();
        int longsSize = in.readVInt();
-        FieldReader current = new FieldReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
-        FieldReader previous = fields.put(fieldInfo.name, current);
+        TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
+        TermsReader previous = fields.put(fieldInfo.name, current);
        checkFieldSummary(state.segmentInfo, current, previous);
      }
      success = true;
@ -96,7 +90,8 @@ public class TempFSTTermsReader extends FieldsProducer {
  }

  private int readHeader(IndexInput in) throws IOException {
-    return CodecUtil.checkHeader(in, TempFSTTermsWriter.TERMS_CODEC_NAME,
+    return CodecUtil.checkHeader(in, 
+                                 TempFSTTermsWriter.TERMS_CODEC_NAME,
                                 TempFSTTermsWriter.TERMS_VERSION_START,
                                 TempFSTTermsWriter.TERMS_VERSION_CURRENT);
  }
@ -104,7 +99,7 @@ public class TempFSTTermsReader extends FieldsProducer {
    in.seek(in.length() - 8);
    in.seek(in.readLong());
  }
-  private void checkFieldSummary(SegmentInfo info, FieldReader field, FieldReader previous) throws IOException {
+  private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
    // #docs with field must be <= #docs
    if (field.docCount < 0 || field.docCount > info.getDocCount()) { 
      throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
@ -147,7 +142,7 @@ public class TempFSTTermsReader extends FieldsProducer {
    }
  }

-  final class FieldReader extends Terms {
+  final class TermsReader extends Terms {
    final FieldInfo fieldInfo;
    final long numTerms;
    final long sumTotalTermFreq;
@ -156,16 +151,14 @@ public class TempFSTTermsReader extends FieldsProducer {
    final int longsSize;
    final FST<TempTermOutputs.TempMetaData> dict;

-    FieldReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
+    TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
      this.fieldInfo = fieldInfo;
      this.numTerms = numTerms;
      this.sumTotalTermFreq = sumTotalTermFreq;
      this.sumDocFreq = sumDocFreq;
      this.docCount = docCount;
      this.longsSize = longsSize;
-      this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(longsSize));
-      //PrintWriter pw = new PrintWriter(new File("../temp/xxx.txt"));
-      //Util.toDot(dict, pw, false, false);
+      this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(fieldInfo, longsSize));
    }

    // nocommit: implement intersect
@ -216,8 +209,32 @@ public class TempFSTTermsReader extends FieldsProducer {
    }

    // Iterates through terms in this field
-    private final class SegmentTermsEnum extends TermsEnum {
-      SegmentTermsEnum() {
+    final class SegmentTermsEnum extends TermsEnum {
+      final BytesRefFSTEnum<TempTermOutputs.TempMetaData> fstEnum;
+
+      /* Current term, null when enum ends or unpositioned */
+      BytesRef term;
+
+      /* Current term stats + decoded metadata (customized by PBF) */
+      final TempTermState state;
+
+      /* Current term stats + undecoded metadata (long[] & byte[]) */
+      TempTermOutputs.TempMetaData meta;
+      ByteArrayDataInput bytesReader;
+
+      /* True when current term's metadata is decoded */
+      boolean decoded;
+
+      /* True when current enum is 'positioned' by seekExact(TermState) */
+      boolean seekPending;
+
+      SegmentTermsEnum() throws IOException {
+        this.fstEnum = new BytesRefFSTEnum<TempTermOutputs.TempMetaData>(dict);
+        this.state = postingsReader.newTermState();
+        this.bytesReader = new ByteArrayDataInput();
+        this.term = null;
+        this.decoded = false;
+        this.seekPending = false;
      }

      @Override
@ -226,56 +243,115 @@ public class TempFSTTermsReader extends FieldsProducer {
      }

      @Override
-      public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
-        return null;
-      }
-
-      @Override
-      public BytesRef next() throws IOException {
-        return null;
+      public TermState termState() throws IOException {
+        decodeMetaData();
+        return state.clone();
      }

      @Override
      public BytesRef term() {
-        return null;
+        return term;
      }
      
      @Override
      public int docFreq() throws IOException {
-        return 0;
+        return state.docFreq;
      }

      @Override
      public long totalTermFreq() throws IOException {
-        return 0;
+        return state.totalTermFreq;
      }

+      // Let PBF decodes metadata from long[] and byte[]
+      private void decodeMetaData() throws IOException {
+        if (!decoded && !seekPending) {
+          if (meta.bytes != null) {
+            bytesReader.reset(meta.bytes, 0, meta.bytes.length);
+          }
+          postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state);
+          decoded = true;
+        }
+      }
+
+      // Update current enum according to FSTEnum
+      private void updateEnum(final InputOutput<TempTermOutputs.TempMetaData> pair) {
+        if (pair == null) {
+          term = null;
+        } else {
+          term = pair.input;
+          meta = pair.output;
+          state.docFreq = meta.docFreq;
+          state.totalTermFreq = meta.totalTermFreq;
+        }
+        decoded = false;
+        seekPending = false;
+      }
+
+      // nocommit: reuse?
      @Override
      public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
-        return null;
+        decodeMetaData();
+        return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
      }

      @Override
      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
-        return null;
+        if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+          return null;
+        }
+        decodeMetaData();
+        return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
      }

+      @Override
+      public BytesRef next() throws IOException {
+        if (seekPending) {  // previously positioned, but termOutputs not fetched
+          seekPending = false;
+          if (seekCeil(term, false) != SeekStatus.FOUND) {
+            return term;
+          }
+        }
+        updateEnum(fstEnum.next());
+        return term;
+      }
+
+      @Override
+      public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
+        updateEnum(fstEnum.seekExact(target));
+        return term != null;
+      }
+
+      // nocommit: when will we useCache?
+      @Override
+      public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+        updateEnum(fstEnum.seekCeil(target));
+        if (term == null) {
+          return SeekStatus.END;
+        } else {
+          return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
+        }
+      }
+
+      // nocommit: this method doesn't act as 'seekExact' right?
      @Override
      public void seekExact(BytesRef target, TermState otherState) {
+        if (term == null || target.compareTo(term) != 0) {
+          state.copyFrom(otherState);
+          term = BytesRef.deepCopyOf(target);
+          seekPending = true;
+        }
      }

-      @Override
-      public TermState termState() throws IOException {
-        return null;
-      }
-
+      // nocommit: do we need this?
      @Override
      public void seekExact(long ord) throws IOException {
+        throw new UnsupportedOperationException();
      }

      @Override
      public long ord() {
-        return 0;
+        throw new UnsupportedOperationException();
      }
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
@ -46,6 +46,7 @@ import org.apache.lucene.codecs.CodecUtil;
 /** FST based term dict, all the metadata held
 *  as output of FST */

+// nocommit: where is 'TermStats' ???
 public class TempFSTTermsWriter extends FieldsConsumer {
  static final String TERMS_EXTENSION = "tmp";
  static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
@ -135,7 +136,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
    }
  }

-  class TermsWriter extends TermsConsumer {
+  final class TermsWriter extends TermsConsumer {
    private final Builder<TempTermOutputs.TempMetaData> builder;
    private final TempTermOutputs outputs;
    private final FieldInfo fieldInfo;
@ -143,13 +144,14 @@ public class TempFSTTermsWriter extends FieldsConsumer {
    private long numTerms;

    private final IntsRef scratchTerm = new IntsRef();
+    private final RAMOutputStream statsWriter = new RAMOutputStream();
    private final RAMOutputStream metaWriter = new RAMOutputStream();

    TermsWriter(FieldInfo fieldInfo) {
      this.numTerms = 0;
      this.fieldInfo = fieldInfo;
      this.longsSize = postingsWriter.setField(fieldInfo);
-      this.outputs = new TempTermOutputs(longsSize);
+      this.outputs = new TempTermOutputs(fieldInfo, longsSize);
      this.builder = new Builder<TempTermOutputs.TempMetaData>(FST.INPUT_TYPE.BYTE1, outputs);
    }

@ -166,16 +168,14 @@ public class TempFSTTermsWriter extends FieldsConsumer {

    @Override
    public void finishTerm(BytesRef text, TermStats stats) throws IOException {
+      // write term meta data into fst
      final TempTermOutputs.TempMetaData meta = new TempTermOutputs.TempMetaData();
      meta.longs = new long[longsSize];
      meta.bytes = null;
+      meta.docFreq = stats.docFreq;
+      meta.totalTermFreq = stats.totalTermFreq;
      postingsWriter.finishTerm(meta.longs, metaWriter, stats);
-      /*
-      meta.bytes = new byte[(int)metaWriter.getFilePointer()];
-      metaWriter.writeTo(meta.bytes, 0);
-      metaWriter.reset();
-      */
-      int bytesSize = (int)metaWriter.getFilePointer();
+      final int bytesSize = (int)metaWriter.getFilePointer();
      if (bytesSize > 0) {
        meta.bytes = new byte[bytesSize];
        metaWriter.writeTo(meta.bytes, 0);
@ -191,6 +191,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
      // save FST dict
      if (numTerms > 0) {
        final FST<TempTermOutputs.TempMetaData> fst = builder.finish();
+        //fst.dump();
        fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
@ -71,7 +71,7 @@ public final class TempPostingsReader extends TempPostingsReaderBase {
    IndexInput posIn = null;
    IndexInput payIn = null;
    try {
-      docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempPostingsFormat.DOC_EXTENSION),
+      docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockPostingsFormat.DOC_EXTENSION),
                            ioContext);
      CodecUtil.checkHeader(docIn,
                            TempPostingsWriter.DOC_CODEC,
@ -80,7 +80,7 @@ public final class TempPostingsReader extends TempPostingsReaderBase {
      forUtil = new ForUtil(docIn);

      if (fieldInfos.hasProx()) {
-        posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempPostingsFormat.POS_EXTENSION),
+        posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockPostingsFormat.POS_EXTENSION),
                              ioContext);
        CodecUtil.checkHeader(posIn,
                              TempPostingsWriter.POS_CODEC,
@ -88,7 +88,7 @@ public final class TempPostingsReader extends TempPostingsReaderBase {
                              TempPostingsWriter.VERSION_CURRENT);

        if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
-          payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempPostingsFormat.PAY_EXTENSION),
+          payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockPostingsFormat.PAY_EXTENSION),
                                ioContext);
          CodecUtil.checkHeader(payIn,
                                TempPostingsWriter.PAY_CODEC,
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
@ -119,7 +119,7 @@ public final class TempPostingsWriter extends TempPostingsWriterBase {
  public TempPostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException {
    super();

-    docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempPostingsFormat.DOC_EXTENSION),
+    docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockPostingsFormat.DOC_EXTENSION),
                                          state.context);
    IndexOutput posOut = null;
    IndexOutput payOut = null;
@ -129,7 +129,7 @@ public final class TempPostingsWriter extends TempPostingsWriterBase {
      forUtil = new ForUtil(acceptableOverheadRatio, docOut);
      if (state.fieldInfos.hasProx()) {
        posDeltaBuffer = new int[MAX_DATA_SIZE];
-        posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempPostingsFormat.POS_EXTENSION),
+        posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockPostingsFormat.POS_EXTENSION),
                                              state.context);
        CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);

@ -150,7 +150,7 @@ public final class TempPostingsWriter extends TempPostingsWriterBase {
        }

        if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
-          payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempPostingsFormat.PAY_EXTENSION),
+          payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockPostingsFormat.PAY_EXTENSION),
                                                state.context);
          CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT);
        }
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java
@ -20,6 +20,8 @@ package org.apache.lucene.codecs.temp;
 import java.io.IOException;
 import java.util.Arrays;

+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.fst.Outputs;
@ -31,18 +33,25 @@ import org.apache.lucene.util.LongsRef;
 public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
  private final static TempMetaData NO_OUTPUT = new TempMetaData();
  private static boolean DEBUG = false;
+  private FieldInfo fieldInfo;
  private int longsSize;

  public static class TempMetaData {
    public long[] longs;
    public byte[] bytes;
+    int docFreq;
+    long totalTermFreq;
    TempMetaData() {
      this.longs = null;
      this.bytes = null;
+      this.docFreq = 0;
+      this.totalTermFreq = -1;
    }
-    TempMetaData(long[] longs, byte[] bytes) {
+    TempMetaData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
      this.longs = longs;
      this.bytes = bytes;
+      this.docFreq = docFreq;
+      this.totalTermFreq = totalTermFreq;
    }
    @Override
    public int hashCode() {
@ -79,12 +88,14 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
      if (bytes != null) {
        sb.append(" [ ");
        for (int i = 0; i < bytes.length; i++) {
-          sb.append(bytes[i]+" ");
+          sb.append(Integer.toHexString((int)bytes[i] & 0xff)+" ");
        }
        sb.append("]");
      } else {
        sb.append(" null");
      }
+      sb.append(" "+docFreq);
+      sb.append(" "+totalTermFreq);
      return sb.toString();
    }
  }
@ -92,7 +103,8 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
  private TempTermOutputs() {
  }

-  protected TempTermOutputs(int longsSize) {
+  protected TempTermOutputs(FieldInfo fieldInfo, int longsSize) {
+    this.fieldInfo = fieldInfo;
    this.longsSize = longsSize;
  }

@ -102,13 +114,13 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
  // i.e. when every value in long[] fits the same ordering, the smaller one 
  // will be the result.
  //
-  // NOTE: only long[] is 'shared', i.e. after sharing common value,
-  // the output of smaller one will be a all-zero long[] with original byte[] blob.
+  // NOTE: only long[] is 'shared', i.e. if there are two byte[] on the successive
+  // arcs, only the last byte[] is valid. (this somewhat saves nodes, but might affect
+  // compression, since we'll have to load metadata block for other terms as well, currently,
+  // we don't support this)
  //
-  // nocommit: Builder.add() doesn't immediatelly consumes the output data, 
-  // which means, the longs after one add() should all be deeply copied 
-  // instead of being reused? quite hairly to detect it here, so the caller 
-  // must be careful about this.
+  // nocommit: get the byte[] from smaller one as well, so that
+  // byte[] is actually inherited
  //
  public TempMetaData common(TempMetaData t1, TempMetaData t2) {
    if (DEBUG) System.out.print("common("+t1+", "+t2+") = ");
@ -148,18 +160,18 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
      if (pos < longsSize || accum == 0) {
        ret = NO_OUTPUT;
      } else if (order) {
-        ret = new TempMetaData(longs2, null);
+        ret = new TempMetaData(longs2, null, 0, -1);
      } else {
-        ret = new TempMetaData(longs1, null);
+        ret = new TempMetaData(longs1, null, 0, -1);
      }
    } else {
      // equal
-      if (t1.bytes!= null && Arrays.equals(t1.bytes, t2.bytes)) {  // all fields are equal
+      if (t1.bytes!= null && bytesEqual(t1, t2) && statsEqual(t1, t2)) {  // all fields are equal
        ret = t1;
      } else if (accum == 0) { // all zero case
        ret = NO_OUTPUT;
      } else {
-        ret = new TempMetaData(longs1, null);
+        ret = new TempMetaData(longs1, null, 0, -1);
      }
    }
    if (DEBUG) System.out.println("ret:"+ret);
@ -189,21 +201,27 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
    }

    TempMetaData ret;
-    if (diff == 0 && (t1.bytes == null || t1.bytes.length == 0)) {
+    if (diff == 0 && bytesEqual(t1, t2) && statsEqual(t1, t2)) {
      ret = NO_OUTPUT;
    } else {
-      ret = new TempMetaData(share, t1.bytes);
+      ret = new TempMetaData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
    }
    if (DEBUG) System.out.println("ret:"+ret);
    return ret;
  }

+  static boolean statsEqual(final TempMetaData t1, final TempMetaData t2) {
+    return t1.docFreq == t2.docFreq && t1.totalTermFreq == t2.totalTermFreq;
+  }
+  static boolean bytesEqual(final TempMetaData t1, final TempMetaData t2) {
+    return Arrays.equals(t1.bytes, t2.bytes);
+  }
+
  @Override
  // nocommit: need to check all-zero case?
  // so we can reuse one long[] 
  public TempMetaData add(TempMetaData t1, TempMetaData t2) {
    if (DEBUG) System.out.print("add("+t1+", "+t2+") = ");
-    // nocommit: necessary?
    if (t1 == NO_OUTPUT) {
      if (DEBUG) System.out.println("ret:"+t2);
      return t2;
@ -215,17 +233,17 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
    assert t2.longs != null;

    int pos = 0;
-    long[] accum = new long[longsSize];  // nocommit: reuse
+    long[] accum = new long[longsSize];  // nocommit: reuse?
    while (pos < longsSize) {
      accum[pos] = t1.longs[pos] + t2.longs[pos];
      assert(accum[pos] >= 0);
      pos++;
    }
    TempMetaData ret;
-    if (t2.bytes != null) {
-      ret = new TempMetaData(accum, t2.bytes);
+    if (t2.bytes != null || t2.docFreq > 0) {
+      ret = new TempMetaData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
    } else {
-      ret = new TempMetaData(accum, t1.bytes);
+      ret = new TempMetaData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
    }
    if (DEBUG) System.out.println("ret:"+ret);
    return ret;
@ -236,14 +254,20 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
    for (int pos = 0; pos < longsSize; pos++) {
      out.writeVLong(data.longs[pos]);
    }
+    int code = data.docFreq == 0 ? 0 : 1;
    if (data.bytes != null) {
-      out.writeVInt(data.bytes.length);
+      out.writeVInt((data.bytes.length << 1) | code);
      out.writeBytes(data.bytes, 0, data.bytes.length);
    } else {
-      out.writeVInt(0);
+      out.writeVInt(code);
+    }
+    if (data.docFreq > 0) {
+      out.writeVInt(data.docFreq);
+      if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+        out.writeVLong(data.totalTermFreq - data.docFreq);
+      }
    }
  }
-  // nocommit: can this non-null byte case be used in Final Output?

  @Override
  public TempMetaData read(DataInput in) throws IOException {
@ -251,13 +275,22 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
    for (int pos = 0; pos < longsSize; pos++) {
      longs[pos] = in.readVLong();
    }
-    int bytesSize = in.readVInt();
+    int code = in.readVInt();
+    int bytesSize = code >>> 1;
+    int docFreq = 0;
+    long totalTermFreq = -1;
    byte[] bytes = null;
    if (bytesSize > 0) {
      bytes = new byte[bytesSize];
      in.readBytes(bytes, 0, bytes.length);
    }
-    TempMetaData meta = new TempMetaData(longs, bytes);
+    if ((code & 1) == 1) {
+      docFreq = in.readVInt();
+      if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+        totalTermFreq = docFreq + in.readVLong();
+      }
+    }
+    TempMetaData meta = new TempMetaData(longs, bytes, docFreq, totalTermFreq);
    return meta;
  }

--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@ -15,4 +15,5 @@

 org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
 org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat
-org.apache.lucene.codecs.temp.TempPostingsFormat
+org.apache.lucene.codecs.temp.TempBlockPostingsFormat
+org.apache.lucene.codecs.temp.TempFSTPostingsFormat
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java
@ -1015,7 +1015,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
    // Don't proceed if picked Codec is in the list of illegal ones.
    final String format = _TestUtil.getPostingsFormat("f");
    assumeFalse("Format: " + format + " does not support ReaderTermsIndexDivisor!",
-                (format.equals("SimpleText") || format.equals("Memory") || format.equals("Direct")));
+                (format.equals("SimpleText") || format.equals("Memory") || format.equals("Direct") || format.equals("TempFST")));

    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, conf);