LUCENE-3069: writer part

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1499744 13f79535-47bb-0310-9956-ffa450edef68
2013-07-04 13:13:54 +00:00 · 2013-07-04 13:13:54 +00:00 · d6e2f4b663
parent 1e3adfae1b
commit d6e2f4b663
6 changed files with 772 additions and 60 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermState.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermState.java
@ -1,47 +0,0 @@
-package org.apache.lucene.codecs.temp;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.TermState;
-
-public class TempBlockTermState extends TempTermState {
-  /** the term's ord in the current block */
-  public int termBlockOrd;
-
-  /** Sole constructor. (For invocation by subclass 
-   *  constructors, typically implicit.) */
-  protected TempBlockTermState() {
-  }
-
-  public TempBlockTermState clone() {
-    TempBlockTermState other = (TempBlockTermState)super.clone();
-    return other;
-  }
-
-  @Override
-  public void copyFrom(TermState _other) {
-    assert _other instanceof TempBlockTermState : "can not copy from " + _other.getClass().getName();
-    super.copyFrom(_other);
-    TempBlockTermState other = (TempBlockTermState) _other;
-    termBlockOrd = other.termBlockOrd;
-  }
-
-  @Override
-  public String toString() {
-    return super.toString() + " termBlockOrd=" + termBlockOrd;
-  }
-}
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
@ -0,0 +1,282 @@
+package org.apache.lucene.codecs.temp;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.File;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.TreeMap;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Outputs;
+import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.TempPostingsReaderBase;
+import org.apache.lucene.codecs.CodecUtil;
+
+
+public class TempFSTTermsReader extends FieldsProducer {
+  final TempPostingsReaderBase postingsReader;
+  final IndexInput in;
+  final TreeMap<String, FieldReader> fields = new TreeMap<String, FieldReader>();
+
+
+  public TempFSTTermsReader(SegmentReadState state, TempPostingsReaderBase postingsReader) throws IOException {
+    final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTTermsWriter.TERMS_EXTENSION);
+
+    this.postingsReader = postingsReader;
+    this.in = state.directory.openInput(termsFileName, state.context);
+
+    boolean success = false;
+    try {
+      readHeader(in);
+      this.postingsReader.init(in);
+      seekDir(in);
+
+      final FieldInfos fieldInfos = state.fieldInfos;
+      final int numFields = in.readVInt();
+      for (int i = 0; i < numFields; i++) {
+        int fieldNumber = in.readVInt();
+        FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+        long numTerms = in.readVLong(); 
+        long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
+        long sumDocFreq = in.readVLong();
+        int docCount = in.readVInt();
+        int longsSize = in.readVInt();
+        FieldReader current = new FieldReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
+        FieldReader previous = fields.put(fieldInfo.name, current);
+        checkFieldSummary(state.segmentInfo, current, previous);
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        in.close();
+      }
+    }
+  }
+
+  private int readHeader(IndexInput in) throws IOException {
+    return CodecUtil.checkHeader(in, TempFSTTermsWriter.TERMS_CODEC_NAME,
+                                 TempFSTTermsWriter.TERMS_VERSION_START,
+                                 TempFSTTermsWriter.TERMS_VERSION_CURRENT);
+  }
+  private void seekDir(IndexInput in) throws IOException {
+    in.seek(in.length() - 8);
+    in.seek(in.readLong());
+  }
+  private void checkFieldSummary(SegmentInfo info, FieldReader field, FieldReader previous) throws IOException {
+    // #docs with field must be <= #docs
+    if (field.docCount < 0 || field.docCount > info.getDocCount()) { 
+      throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
+    }
+    // #postings must be >= #docs with field
+    if (field.sumDocFreq < field.docCount) {  
+      throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (resource=" + in + ")");
+    }
+    // #positions must be >= #postings
+    if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { 
+      throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (resource=" + in + ")");
+    }
+    if (previous != null) {
+      throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (resource=" + in + ")");
+    }
+  }
+
+  @Override
+  public Iterator<String> iterator() {
+    return Collections.unmodifiableSet(fields.keySet()).iterator();
+  }
+
+  @Override
+  public Terms terms(String field) throws IOException {
+    assert field != null;
+    return fields.get(field);
+  }
+
+  @Override
+  public int size() {
+    return fields.size();
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(in, postingsReader);
+    } finally {
+      fields.clear();
+    }
+  }
+
+  final class FieldReader extends Terms {
+    final FieldInfo fieldInfo;
+    final long numTerms;
+    final long sumTotalTermFreq;
+    final long sumDocFreq;
+    final int docCount;
+    final int longsSize;
+    final FST<TempTermOutputs.TempMetaData> dict;
+
+    FieldReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
+      this.fieldInfo = fieldInfo;
+      this.numTerms = numTerms;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+      this.longsSize = longsSize;
+      this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(longsSize));
+      //PrintWriter pw = new PrintWriter(new File("../temp/xxx.txt"));
+      //Util.toDot(dict, pw, false, false);
+    }
+
+    // nocommit: implement intersect
+    // nocommit: why do we need this comparator overridden again and again?
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public TermsEnum iterator(TermsEnum reuse) throws IOException {
+      return new SegmentTermsEnum();
+    }
+
+    @Override
+    public boolean hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return fieldInfo.hasPayloads();
+    }
+
+    @Override
+    public long size() {
+      return numTerms;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
+    }
+
+    @Override
+    public long getSumDocFreq() throws IOException {
+      return sumDocFreq;
+    }
+
+    @Override
+    public int getDocCount() throws IOException {
+      return docCount;
+    }
+
+    // Iterates through terms in this field
+    private final class SegmentTermsEnum extends TermsEnum {
+      SegmentTermsEnum() {
+      }
+
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
+      }
+
+      @Override
+      public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+        return null;
+      }
+
+      @Override
+      public BytesRef next() throws IOException {
+        return null;
+      }
+
+      @Override
+      public BytesRef term() {
+        return null;
+      }
+      
+      @Override
+      public int docFreq() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public long totalTermFreq() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+        return null;
+      }
+
+      @Override
+      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+        return null;
+      }
+
+      @Override
+      public void seekExact(BytesRef target, TermState otherState) {
+      }
+
+      @Override
+      public TermState termState() throws IOException {
+        return null;
+      }
+
+      @Override
+      public void seekExact(long ord) throws IOException {
+      }
+
+      @Override
+      public long ord() {
+        return 0;
+      }
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
@ -0,0 +1,198 @@
+package org.apache.lucene.codecs.temp;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Comparator;
+
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.codecs.TempPostingsWriterBase;
+import org.apache.lucene.codecs.PostingsConsumer;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.TermsConsumer;
+import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.CodecUtil;
+
+/** FST based term dict, all the metadata held
+ *  as output of FST */
+
+public class TempFSTTermsWriter extends FieldsConsumer {
+  static final String TERMS_EXTENSION = "tmp";
+  static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
+  public static final int TERMS_VERSION_START = 0;
+  public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
+  
+  final TempPostingsWriterBase postingsWriter;
+  final FieldInfos fieldInfos;
+  final IndexOutput out;
+  final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
+
+  public TempFSTTermsWriter(SegmentWriteState state, TempPostingsWriterBase postingsWriter) throws IOException {
+    final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
+
+    this.postingsWriter = postingsWriter;
+    this.fieldInfos = state.fieldInfos;
+    this.out = state.directory.createOutput(termsFileName, state.context);
+
+    // nocommit: why try catch here? not catching createOutput?
+    boolean success = false;
+    try {
+      writeHeader(out);
+      this.postingsWriter.start(out); 
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(out);
+      }
+    }
+  }
+  private void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);   
+  }
+  private void writeTrailer(IndexOutput out, long dirStart) throws IOException {
+    out.writeLong(dirStart);
+  }
+
+  @Override
+  public TermsConsumer addField(FieldInfo field) throws IOException {
+    return new TermsWriter(field);
+  }
+
+  @Override
+  public void close() throws IOException {
+    IOException ioe = null;
+    try {
+      // write field summary
+      final long dirStart = out.getFilePointer();
+      
+      out.writeVInt(fields.size());
+      for (FieldMetaData field : fields) {
+        out.writeVInt(field.fieldInfo.number);
+        out.writeVLong(field.numTerms);
+        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+          out.writeVLong(field.sumTotalTermFreq);
+        }
+        out.writeVLong(field.sumDocFreq);
+        out.writeVInt(field.docCount);
+        out.writeVInt(field.longsSize);
+        field.dict.save(out);
+      }
+      writeTrailer(out, dirStart);
+    } catch (IOException ioe2) {
+      ioe = ioe2;
+    } finally {
+      IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
+    }
+  }
+
+  private static class FieldMetaData {
+    public final FieldInfo fieldInfo;
+    public final long numTerms;
+    public final long sumTotalTermFreq;
+    public final long sumDocFreq;
+    public final int docCount;
+    public final int longsSize;
+    public final FST<TempTermOutputs.TempMetaData> dict;
+
+    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<TempTermOutputs.TempMetaData> fst) {
+      this.fieldInfo = fieldInfo;
+      this.numTerms = numTerms;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+      this.longsSize = longsSize;
+      this.dict = fst;
+    }
+  }
+
+  class TermsWriter extends TermsConsumer {
+    private final Builder<TempTermOutputs.TempMetaData> builder;
+    private final TempTermOutputs outputs;
+    private final FieldInfo fieldInfo;
+    private final int longsSize;
+    private long numTerms;
+
+    private final IntsRef scratchTerm = new IntsRef();
+    private final RAMOutputStream metaWriter = new RAMOutputStream();
+
+    TermsWriter(FieldInfo fieldInfo) {
+      this.numTerms = 0;
+      this.fieldInfo = fieldInfo;
+      this.longsSize = postingsWriter.setField(fieldInfo);
+      this.outputs = new TempTermOutputs(longsSize);
+      this.builder = new Builder<TempTermOutputs.TempMetaData>(FST.INPUT_TYPE.BYTE1, outputs);
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public PostingsConsumer startTerm(BytesRef text) throws IOException {
+      postingsWriter.startTerm();
+      return postingsWriter;
+    }
+
+    @Override
+    public void finishTerm(BytesRef text, TermStats stats) throws IOException {
+      final TempTermOutputs.TempMetaData meta = new TempTermOutputs.TempMetaData();
+      meta.longs = new long[longsSize];
+      meta.bytes = null;
+      postingsWriter.finishTerm(meta.longs, metaWriter, stats);
+      /*
+      meta.bytes = new byte[(int)metaWriter.getFilePointer()];
+      metaWriter.writeTo(meta.bytes, 0);
+      metaWriter.reset();
+      */
+      int bytesSize = (int)metaWriter.getFilePointer();
+      if (bytesSize > 0) {
+        meta.bytes = new byte[bytesSize];
+        metaWriter.writeTo(meta.bytes, 0);
+        metaWriter.reset();
+      }
+      //System.out.println("add term:<"+text.utf8ToString()+", "+meta+">");
+      builder.add(Util.toIntsRef(text, scratchTerm), meta);
+      numTerms++;
+    }
+
+    @Override
+    public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
+      // save FST dict
+      if (numTerms > 0) {
+        final FST<TempTermOutputs.TempMetaData> fst = builder.finish();
+        fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
+      }
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java
@ -384,7 +384,10 @@ public final class TempPostingsFormat extends PostingsFormat {
  /** Creates {@code TempPostingsFormat} with default
   *  settings. */
  public TempPostingsFormat() {
-    this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+    super("TempFST");
+    minTermBlockSize = 0;
+    maxTermBlockSize = 0;
+    //this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
  }

  /** Creates {@code TempPostingsFormat} with custom
@ -410,10 +413,11 @@ public final class TempPostingsFormat extends PostingsFormat {

    boolean success = false;
    try {
-      FieldsConsumer ret = new TempBlockTermsWriter(state, 
-                                                    postingsWriter,
-                                                    minTermBlockSize, 
-                                                    maxTermBlockSize);
+      //FieldsConsumer ret = new TempBlockTermsWriter(state, 
+      //                                              postingsWriter,
+      //                                              minTermBlockSize, 
+      //                                              maxTermBlockSize);
+      FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter);
      success = true;
      return ret;
    } finally {
@ -432,13 +436,14 @@ public final class TempPostingsFormat extends PostingsFormat {
                                                                state.segmentSuffix);
    boolean success = false;
    try {
-      FieldsProducer ret = new TempBlockTermsReader(state.directory,
-                                                    state.fieldInfos,
-                                                    state.segmentInfo,
-                                                    postingsReader,
-                                                    state.context,
-                                                    state.segmentSuffix,
-                                                    state.termsIndexDivisor);
+      //FieldsProducer ret = new TempBlockTermsReader(state.directory,
+      //                                              state.fieldInfos,
+      //                                              state.segmentInfo,
+      //                                              postingsReader,
+      //                                              state.context,
+      //                                              state.segmentSuffix,
+      //                                              state.termsIndexDivisor);
+      FieldsProducer ret = new TempFSTTermsReader(state, postingsReader);
      success = true;
      return ret;
    } finally {
--- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java
@ -0,0 +1,274 @@
+package org.apache.lucene.codecs.temp;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.fst.Outputs;
+import org.apache.lucene.util.LongsRef;
+
+
+// NOTE: outputs should be per-field, since
+// longsSize is fixed for each field
+public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
+  private final static TempMetaData NO_OUTPUT = new TempMetaData();
+  private static boolean DEBUG = false;
+  private int longsSize;
+
+  public static class TempMetaData {
+    public long[] longs;
+    public byte[] bytes;
+    TempMetaData() {
+      this.longs = null;
+      this.bytes = null;
+    }
+    TempMetaData(long[] longs, byte[] bytes) {
+      this.longs = longs;
+      this.bytes = bytes;
+    }
+    @Override
+    public int hashCode() {
+      int hash = 0;
+      if (longs != null) {
+        final int end = longs.length;
+        for (int i = 0; i < end; i++) {
+          hash -= longs[i];
+        }
+      }
+      if (bytes != null) {
+        hash = -hash;
+        final int end = bytes.length;
+        for (int i = 0; i < end; i++) {
+          hash += bytes[i];
+        }
+      }
+      return hash;
+    }
+    public String toString() {
+      if (this == NO_OUTPUT) {
+        return "no_output";
+      }
+      StringBuffer sb = new StringBuffer();
+      if (longs != null) {
+        sb.append("[ ");
+        for (int i = 0; i < longs.length; i++) {
+          sb.append(longs[i]+" ");
+        }
+        sb.append("]");
+      } else {
+        sb.append("null");
+      }
+      if (bytes != null) {
+        sb.append(" [ ");
+        for (int i = 0; i < bytes.length; i++) {
+          sb.append(bytes[i]+" ");
+        }
+        sb.append("]");
+      } else {
+        sb.append(" null");
+      }
+      return sb.toString();
+    }
+  }
+  
+  private TempTermOutputs() {
+  }
+
+  protected TempTermOutputs(int longsSize) {
+    this.longsSize = longsSize;
+  }
+
+  @Override
+  //
+  // Since longs blob is fixed length, when these two are 'comparable'
+  // i.e. when every value in long[] fits the same ordering, the smaller one 
+  // will be the result.
+  //
+  // NOTE: only long[] is 'shared', i.e. after sharing common value,
+  // the output of smaller one will be a all-zero long[] with original byte[] blob.
+  //
+  // nocommit: Builder.add() doesn't immediatelly consumes the output data, 
+  // which means, the longs after one add() should all be deeply copied 
+  // instead of being reused? quite hairly to detect it here, so the caller 
+  // must be careful about this.
+  //
+  public TempMetaData common(TempMetaData t1, TempMetaData t2) {
+    if (DEBUG) System.out.print("common("+t1+", "+t2+") = ");
+    if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
+      if (DEBUG) System.out.println("ret:"+NO_OUTPUT);
+      return NO_OUTPUT;
+    }
+    assert t1.longs != null;
+    assert t2.longs != null;
+    assert t1.longs.length == t2.longs.length;
+
+    long accum = 0;
+    long[] longs1 = t1.longs, longs2 = t2.longs;
+    int pos = 0;
+    boolean order = true;
+    TempMetaData ret;
+
+    while (pos < longsSize && longs1[pos] == longs2[pos]) {
+      pos++;
+    }
+    if (pos < longsSize) {
+      // unequal
+      order = (longs1[pos] > longs2[pos]);
+      if (order) {
+        // check whether strictly longs1 >= longs2 
+        while (pos < longsSize && longs1[pos] >= longs2[pos]) {
+          accum += longs2[pos];
+          pos++;
+        }
+      } else {
+        // check whether strictly longs1 <= longs2 
+        while (pos < longsSize && longs1[pos] <= longs2[pos]) {
+          accum += longs1[pos];
+          pos++;
+        }
+      }
+      if (pos < longsSize || accum == 0) {
+        ret = NO_OUTPUT;
+      } else if (order) {
+        ret = new TempMetaData(longs2, null);
+      } else {
+        ret = new TempMetaData(longs1, null);
+      }
+    } else {
+      // equal
+      if (t1.bytes!= null && Arrays.equals(t1.bytes, t2.bytes)) {  // all fields are equal
+        ret = t1;
+      } else if (accum == 0) { // all zero case
+        ret = NO_OUTPUT;
+      } else {
+        ret = new TempMetaData(longs1, null);
+      }
+    }
+    if (DEBUG) System.out.println("ret:"+ret);
+    return ret;
+  }
+
+  @Override
+  // nocommit: 
+  // this *actually* always assume that t2 <= t1 before calling the method
+  public TempMetaData subtract(TempMetaData t1, TempMetaData t2) {
+    if (DEBUG) System.out.print("subtract("+t1+", "+t2+") = ");
+    if (t2 == NO_OUTPUT) {
+      if (DEBUG) System.out.println("ret:"+t1);
+      return t1;
+    }
+    assert t1.longs != null;
+    assert t2.longs != null;
+
+    int pos = 0;
+    long diff = 0;
+    long[] share = new long[longsSize];  // nocommit: reuse
+
+    while (pos < longsSize) {
+      share[pos] = t1.longs[pos] - t2.longs[pos];
+      diff += share[pos];
+      pos++;
+    }
+
+    TempMetaData ret;
+    if (diff == 0 && (t1.bytes == null || t1.bytes.length == 0)) {
+      ret = NO_OUTPUT;
+    } else {
+      ret = new TempMetaData(share, t1.bytes);
+    }
+    if (DEBUG) System.out.println("ret:"+ret);
+    return ret;
+  }
+
+  @Override
+  // nocommit: need to check all-zero case?
+  // so we can reuse one long[] 
+  public TempMetaData add(TempMetaData t1, TempMetaData t2) {
+    if (DEBUG) System.out.print("add("+t1+", "+t2+") = ");
+    // nocommit: necessary?
+    if (t1 == NO_OUTPUT) {
+      if (DEBUG) System.out.println("ret:"+t2);
+      return t2;
+    } else if (t2 == NO_OUTPUT) {
+      if (DEBUG) System.out.println("ret:"+t1);
+      return t1;
+    }
+    assert t1.longs != null;
+    assert t2.longs != null;
+
+    int pos = 0;
+    long[] accum = new long[longsSize];  // nocommit: reuse
+    while (pos < longsSize) {
+      accum[pos] = t1.longs[pos] + t2.longs[pos];
+      assert(accum[pos] >= 0);
+      pos++;
+    }
+    TempMetaData ret;
+    if (t2.bytes != null) {
+      ret = new TempMetaData(accum, t2.bytes);
+    } else {
+      ret = new TempMetaData(accum, t1.bytes);
+    }
+    if (DEBUG) System.out.println("ret:"+ret);
+    return ret;
+  }
+
+  @Override
+  public void write(TempMetaData data, DataOutput out) throws IOException {
+    for (int pos = 0; pos < longsSize; pos++) {
+      out.writeVLong(data.longs[pos]);
+    }
+    if (data.bytes != null) {
+      out.writeVInt(data.bytes.length);
+      out.writeBytes(data.bytes, 0, data.bytes.length);
+    } else {
+      out.writeVInt(0);
+    }
+  }
+  // nocommit: can this non-null byte case be used in Final Output?
+
+  @Override
+  public TempMetaData read(DataInput in) throws IOException {
+    long[] longs = new long[longsSize];
+    for (int pos = 0; pos < longsSize; pos++) {
+      longs[pos] = in.readVLong();
+    }
+    int bytesSize = in.readVInt();
+    byte[] bytes = null;
+    if (bytesSize > 0) {
+      bytes = new byte[bytesSize];
+      in.readBytes(bytes, 0, bytes.length);
+    }
+    TempMetaData meta = new TempMetaData(longs, bytes);
+    return meta;
+  }
+
+  @Override
+  public TempMetaData getNoOutput() {
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public String outputToString(TempMetaData data) {
+    return data.toString();
+  }
+}
+  
--- a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
@ -68,7 +68,7 @@ final class NodeHash<T> {
  }

  // hash code for an unfrozen node.  This must be identical
-  // to the un-frozen case (below)!!
+  // to the frozen case (below)!!
  private long hash(Builder.UnCompiledNode<T> node) {
    final int PRIME = 31;
    //System.out.println("hash unfrozen");