LUCENE-5675: initial fork of BT with versioning added

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1595064 13f79535-47bb-0310-9956-ffa450edef68
2014-05-15 22:28:48 +00:00 · 2014-05-15 22:28:48 +00:00 · 491dc6a716
parent 1ea0ce4062
commit 491dc6a716
10 changed files with 3441 additions and 31 deletions
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
@ -17,8 +17,6 @@ package org.apache.lucene.codecs.idversion;
 * limitations under the License.
 */

-import static org.apache.lucene.codecs.idversion.IDVersionPostingsWriter.IDVersionTermState;
-
 import java.io.IOException;

 import org.apache.lucene.codecs.BlockTermState;
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
@ -43,26 +43,6 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
  private int lastPosition;
  private long lastVersion;

-  final static class IDVersionTermState extends BlockTermState {
-    long idVersion;
-    int docID;
-
-    @Override
-    public IDVersionTermState clone() {
-      IDVersionTermState other = new IDVersionTermState();
-      other.copyFrom(this);
-      return other;
-    }
-
-    @Override
-    public void copyFrom(TermState _other) {
-      super.copyFrom(_other);
-      IDVersionTermState other = (IDVersionTermState) _other;
-      idVersion = other.idVersion;
-      docID = other.docID;
-    }
-  }
-
  @Override
  public IDVersionTermState newTermState() {
    return new IDVersionTermState();
@ -144,8 +124,8 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
  @Override
  public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
    IDVersionTermState state = (IDVersionTermState) _state;
-    // nocommit must send version up to FST somehow ...
    out.writeVInt(state.docID);
+    out.writeVLong(state.idVersion);
  }

  @Override
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@ -0,0 +1,971 @@
+package org.apache.lucene.codecs.idversion;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Outputs;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.Util;
+
+/** Iterates through terms in this field */
+final class IDVersionSegmentTermsEnum extends TermsEnum {
+
+  final static Outputs<Pair<BytesRef,Long>> fstOutputs = VersionBlockTreeTermsWriter.getFSTOutputs();
+  final static Pair<BytesRef,Long> NO_OUTPUT = fstOutputs.getNoOutput();
+
+  // Lazy init:
+  IndexInput in;
+
+  private IDVersionSegmentTermsEnumFrame[] stack;
+  private final IDVersionSegmentTermsEnumFrame staticFrame;
+  IDVersionSegmentTermsEnumFrame currentFrame;
+  boolean termExists;
+  final VersionFieldReader fr;
+
+  // nocommit make this public "for casting" and add a getVersion method?
+
+  private int targetBeforeCurrentLength;
+
+  private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
+
+  // What prefix of the current term was present in the index:
+  private int validIndexPrefix;
+
+  // assert only:
+  private boolean eof;
+
+  final BytesRef term = new BytesRef();
+  private final FST.BytesReader fstReader;
+
+  @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<Pair<BytesRef,Long>>[] arcs =
+  new FST.Arc[1];
+
+  public IDVersionSegmentTermsEnum(VersionFieldReader fr) throws IOException {
+    this.fr = fr;
+
+    //if (DEBUG) System.out.println("BTTR.init seg=" + segment);
+    stack = new IDVersionSegmentTermsEnumFrame[0];
+        
+    // Used to hold seek by TermState, or cached seek
+    staticFrame = new IDVersionSegmentTermsEnumFrame(this, -1);
+
+    if (fr.index == null) {
+      fstReader = null;
+    } else {
+      fstReader = fr.index.getBytesReader();
+    }
+
+    // Init w/ root block; don't use index since it may
+    // not (and need not) have been loaded
+    for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
+      arcs[arcIdx] = new FST.Arc<>();
+    }
+
+    currentFrame = staticFrame;
+    final FST.Arc<Pair<BytesRef,Long>> arc;
+    if (fr.index != null) {
+      arc = fr.index.getFirstArc(arcs[0]);
+      // Empty string prefix must have an output in the index!
+      assert arc.isFinal();
+    } else {
+      arc = null;
+    }
+    currentFrame = staticFrame;
+    //currentFrame = pushFrame(arc, rootCode, 0);
+    //currentFrame.loadBlock();
+    validIndexPrefix = 0;
+    // if (DEBUG) {
+    //   System.out.println("init frame state " + currentFrame.ord);
+    //   printSeekState();
+    // }
+
+    //System.out.println();
+    // computeBlockStats().print(System.out);
+  }
+      
+  // Not private to avoid synthetic access$NNN methods
+  void initIndexInput() {
+    if (this.in == null) {
+      this.in = fr.parent.in.clone();
+    }
+  }
+
+  private IDVersionSegmentTermsEnumFrame getFrame(int ord) throws IOException {
+    if (ord >= stack.length) {
+      final IDVersionSegmentTermsEnumFrame[] next = new IDVersionSegmentTermsEnumFrame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+      System.arraycopy(stack, 0, next, 0, stack.length);
+      for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
+        next[stackOrd] = new IDVersionSegmentTermsEnumFrame(this, stackOrd);
+      }
+      stack = next;
+    }
+    assert stack[ord].ord == ord;
+    return stack[ord];
+  }
+
+  private FST.Arc<Pair<BytesRef,Long>> getArc(int ord) {
+    if (ord >= arcs.length) {
+      @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<Pair<BytesRef,Long>>[] next =
+      new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+      System.arraycopy(arcs, 0, next, 0, arcs.length);
+      for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
+        next[arcOrd] = new FST.Arc<>();
+      }
+      arcs = next;
+    }
+    return arcs[ord];
+  }
+
+  // Pushes a frame we seek'd to
+  IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, Pair<BytesRef,Long> frameData, int length) throws IOException {
+    scratchReader.reset(frameData.output1.bytes, frameData.output1.offset, frameData.output1.length);
+    final long code = scratchReader.readVLong();
+    final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+    final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
+    f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
+    f.hasTermsOrig = f.hasTerms;
+    f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
+    if (f.isFloor) {
+      f.setFloorData(scratchReader, frameData.output1);
+    }
+    pushFrame(arc, fpSeek, length);
+
+    return f;
+  }
+
+  // Pushes next'd frame or seek'd frame; we later
+  // lazy-load the frame only when needed
+  IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, long fp, int length) throws IOException {
+    final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
+    f.arc = arc;
+    if (f.fpOrig == fp && f.nextEnt != -1) {
+      //if (DEBUG) System.out.println("      push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
+      if (f.prefix > targetBeforeCurrentLength) {
+        f.rewind();
+      } else {
+        // if (DEBUG) {
+        //   System.out.println("        skip rewind!");
+        // }
+      }
+      assert length == f.prefix;
+    } else {
+      f.nextEnt = -1;
+      f.prefix = length;
+      f.state.termBlockOrd = 0;
+      f.fpOrig = f.fp = fp;
+      f.lastSubFP = -1;
+      // if (DEBUG) {
+      //   final int sav = term.length;
+      //   term.length = length;
+      //   System.out.println("      push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
+      //   term.length = sav;
+      // }
+    }
+
+    return f;
+  }
+
+  // asserts only
+  private boolean clearEOF() {
+    eof = false;
+    return true;
+  }
+
+  // asserts only
+  private boolean setEOF() {
+    eof = true;
+    return true;
+  }
+
+  // nocommit we need a seekExact(BytesRef target, long minVersion) API?
+
+  @Override
+  public boolean seekExact(final BytesRef target) throws IOException {
+    return seekExact(target, 0);
+  }
+
+  /** Returns false if the term deos not exist, or it exists but its version is < minIDVersion. */
+  public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
+
+    if (fr.index == null) {
+      throw new IllegalStateException("terms index was not loaded");
+    }
+
+    if (term.bytes.length <= target.length) {
+      term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
+    }
+
+    assert clearEOF();
+
+    // if (DEBUG) {
+    //   System.out.println("\nBTTR.seekExact seg=" + segment + " target=" + fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
+    //   printSeekState();
+    // }
+
+    FST.Arc<Pair<BytesRef,Long>> arc;
+    int targetUpto;
+    Pair<BytesRef,Long> output;
+
+    targetBeforeCurrentLength = currentFrame.ord;
+
+    if (currentFrame != staticFrame) {
+
+      // We are already seek'd; find the common
+      // prefix of new seek term vs current term and
+      // re-use the corresponding seek state.  For
+      // example, if app first seeks to foobar, then
+      // seeks to foobaz, we can re-use the seek state
+      // for the first 5 bytes.
+
+      // if (DEBUG) {
+      //   System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
+      // }
+
+      arc = arcs[0];
+      assert arc.isFinal();
+      output = arc.output;
+      targetUpto = 0;
+
+      IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
+      assert validIndexPrefix <= term.length;
+
+      final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+      int cmp = 0;
+
+      // TODO: reverse vLong byte order for better FST
+      // prefix output sharing
+
+      // First compare up to valid seek frames:
+      while (targetUpto < targetLimit) {
+        cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+        // if (DEBUG) {
+        //   System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
+        // }
+        if (cmp != 0) {
+          break;
+        }
+        arc = arcs[1+targetUpto];
+        //if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
+        //System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
+        //}
+        assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+        if (arc.output != NO_OUTPUT) {
+          output = fstOutputs.add(output, arc.output);
+        }
+        if (arc.isFinal()) {
+          lastFrame = stack[1+lastFrame.ord];
+        }
+        targetUpto++;
+      }
+
+      if (cmp == 0) {
+        final int targetUptoMid = targetUpto;
+
+        // Second compare the rest of the term, but
+        // don't save arc/output/frame; we only do this
+        // to find out if the target term is before,
+        // equal or after the current term
+        final int targetLimit2 = Math.min(target.length, term.length);
+        while (targetUpto < targetLimit2) {
+          cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+          // if (DEBUG) {
+          //   System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+          // }
+          if (cmp != 0) {
+            break;
+          }
+          targetUpto++;
+        }
+
+        if (cmp == 0) {
+          cmp = term.length - target.length;
+        }
+        targetUpto = targetUptoMid;
+      }
+
+      if (cmp < 0) {
+        // Common case: target term is after current
+        // term, ie, app is seeking multiple terms
+        // in sorted order
+        // if (DEBUG) {
+        //   System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
+        // }
+        currentFrame = lastFrame;
+
+      } else if (cmp > 0) {
+        // Uncommon case: target term
+        // is before current term; this means we can
+        // keep the currentFrame but we must rewind it
+        // (so we scan from the start)
+        targetBeforeCurrentLength = 0;
+        // if (DEBUG) {
+        //   System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+        // }
+        currentFrame = lastFrame;
+        currentFrame.rewind();
+      } else {
+        // Target is exactly the same as current term
+        assert term.length == target.length;
+        if (termExists) {
+          // if (DEBUG) {
+          //   System.out.println("  target is same as current; return true");
+          // }
+          return true;
+        } else {
+          // if (DEBUG) {
+          //   System.out.println("  target is same as current but term doesn't exist");
+          // }
+        }
+        //validIndexPrefix = currentFrame.depth;
+        //term.length = target.length;
+        //return termExists;
+      }
+
+    } else {
+
+      targetBeforeCurrentLength = -1;
+      arc = fr.index.getFirstArc(arcs[0]);
+
+      // Empty string prefix must have an output (block) in the index!
+      assert arc.isFinal();
+      assert arc.output != null;
+
+      // if (DEBUG) {
+      //   System.out.println("    no seek state; push root frame");
+      // }
+
+      output = arc.output;
+
+      currentFrame = staticFrame;
+
+      //term.length = 0;
+      targetUpto = 0;
+      currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+    }
+
+    // if (DEBUG) {
+    //   System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+    // }
+
+    while (targetUpto < target.length) {
+
+      final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+      final FST.Arc<Pair<BytesRef,Long>> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+
+      if (nextArc == null) {
+
+        // Index is exhausted
+        // if (DEBUG) {
+        //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+        // }
+            
+        validIndexPrefix = currentFrame.prefix;
+        //validIndexPrefix = targetUpto;
+
+        currentFrame.scanToFloorFrame(target);
+
+        if (!currentFrame.hasTerms) {
+          termExists = false;
+          term.bytes[targetUpto] = (byte) targetLabel;
+          term.length = 1+targetUpto;
+          // if (DEBUG) {
+          //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
+          // }
+          return false;
+        }
+
+        if ((Long.MAX_VALUE-output.output2) < minIDVersion) {
+          // The max version for all terms in this block is lower than the minVersion
+          return false;
+        }
+
+        currentFrame.loadBlock();
+
+        final SeekStatus result = currentFrame.scanToTerm(target, true);            
+        if (result == SeekStatus.FOUND) {
+          // if (DEBUG) {
+          //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
+          // }
+
+          currentFrame.decodeMetaData();
+          if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
+            // The max version for this term is lower than the minVersion
+            return false;
+          }
+          return true;
+        } else {
+          // if (DEBUG) {
+          //   System.out.println("  got " + result + "; return NOT_FOUND term=" + brToString(term));
+          // }
+          return false;
+        }
+      } else {
+        // Follow this arc
+        arc = nextArc;
+        term.bytes[targetUpto] = (byte) targetLabel;
+        // Aggregate output as we go:
+        assert arc.output != null;
+        if (arc.output != NO_OUTPUT) {
+          output = fstOutputs.add(output, arc.output);
+        }
+
+        // if (DEBUG) {
+        //   System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+        // }
+        targetUpto++;
+
+        if (arc.isFinal()) {
+          //if (DEBUG) System.out.println("    arc is final!");
+          currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+          //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+        }
+      }
+    }
+
+    //validIndexPrefix = targetUpto;
+    validIndexPrefix = currentFrame.prefix;
+
+    currentFrame.scanToFloorFrame(target);
+
+    // Target term is entirely contained in the index:
+    if (!currentFrame.hasTerms) {
+      termExists = false;
+      term.length = targetUpto;
+      // if (DEBUG) {
+      //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
+      // }
+      return false;
+    }
+
+    currentFrame.loadBlock();
+
+    final SeekStatus result = currentFrame.scanToTerm(target, true);            
+    if (result == SeekStatus.FOUND) {
+      // if (DEBUG) {
+      //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
+      // }
+      return true;
+    } else {
+      // if (DEBUG) {
+      //   System.out.println("  got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
+      // }
+
+      return false;
+    }
+  }
+
+  @Override
+  public SeekStatus seekCeil(final BytesRef target) throws IOException {
+    if (fr.index == null) {
+      throw new IllegalStateException("terms index was not loaded");
+    }
+   
+    if (term.bytes.length <= target.length) {
+      term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
+    }
+
+    assert clearEOF();
+
+    //if (DEBUG) {
+    //System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=  " + validIndexPrefix);
+    //printSeekState();
+    //}
+
+    FST.Arc<Pair<BytesRef,Long>> arc;
+    int targetUpto;
+    Pair<BytesRef,Long> output;
+
+    targetBeforeCurrentLength = currentFrame.ord;
+
+    if (currentFrame != staticFrame) {
+
+      // We are already seek'd; find the common
+      // prefix of new seek term vs current term and
+      // re-use the corresponding seek state.  For
+      // example, if app first seeks to foobar, then
+      // seeks to foobaz, we can re-use the seek state
+      // for the first 5 bytes.
+
+      //if (DEBUG) {
+      //System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
+      //}
+
+      arc = arcs[0];
+      assert arc.isFinal();
+      output = arc.output;
+      targetUpto = 0;
+          
+      IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
+      assert validIndexPrefix <= term.length;
+
+      final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+      int cmp = 0;
+
+      // TOOD: we should write our vLong backwards (MSB
+      // first) to get better sharing from the FST
+
+      // First compare up to valid seek frames:
+      while (targetUpto < targetLimit) {
+        cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+        //if (DEBUG) {
+        //System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
+        //}
+        if (cmp != 0) {
+          break;
+        }
+        arc = arcs[1+targetUpto];
+        assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+        // TOOD: we could save the outputs in local
+        // byte[][] instead of making new objs ever
+        // seek; but, often the FST doesn't have any
+        // shared bytes (but this could change if we
+        // reverse vLong byte order)
+        if (arc.output != NO_OUTPUT) {
+          output = fstOutputs.add(output, arc.output);
+        }
+        if (arc.isFinal()) {
+          lastFrame = stack[1+lastFrame.ord];
+        }
+        targetUpto++;
+      }
+
+
+      if (cmp == 0) {
+        final int targetUptoMid = targetUpto;
+        // Second compare the rest of the term, but
+        // don't save arc/output/frame:
+        final int targetLimit2 = Math.min(target.length, term.length);
+        while (targetUpto < targetLimit2) {
+          cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+          //if (DEBUG) {
+          //System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+          //}
+          if (cmp != 0) {
+            break;
+          }
+          targetUpto++;
+        }
+
+        if (cmp == 0) {
+          cmp = term.length - target.length;
+        }
+        targetUpto = targetUptoMid;
+      }
+
+      if (cmp < 0) {
+        // Common case: target term is after current
+        // term, ie, app is seeking multiple terms
+        // in sorted order
+        //if (DEBUG) {
+        //System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord);
+        //}
+        currentFrame = lastFrame;
+
+      } else if (cmp > 0) {
+        // Uncommon case: target term
+        // is before current term; this means we can
+        // keep the currentFrame but we must rewind it
+        // (so we scan from the start)
+        targetBeforeCurrentLength = 0;
+        //if (DEBUG) {
+        //System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+        //}
+        currentFrame = lastFrame;
+        currentFrame.rewind();
+      } else {
+        // Target is exactly the same as current term
+        assert term.length == target.length;
+        if (termExists) {
+          //if (DEBUG) {
+          //System.out.println("  target is same as current; return FOUND");
+          //}
+          return SeekStatus.FOUND;
+        } else {
+          //if (DEBUG) {
+          //System.out.println("  target is same as current but term doesn't exist");
+          //}
+        }
+      }
+
+    } else {
+
+      targetBeforeCurrentLength = -1;
+      arc = fr.index.getFirstArc(arcs[0]);
+
+      // Empty string prefix must have an output (block) in the index!
+      assert arc.isFinal();
+      assert arc.output != null;
+
+      //if (DEBUG) {
+      //System.out.println("    no seek state; push root frame");
+      //}
+
+      output = arc.output;
+
+      currentFrame = staticFrame;
+
+      //term.length = 0;
+      targetUpto = 0;
+      currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+    }
+
+    //if (DEBUG) {
+    //System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+    //}
+
+    while (targetUpto < target.length) {
+
+      final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+      final FST.Arc<Pair<BytesRef,Long>> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+
+      if (nextArc == null) {
+
+        // Index is exhausted
+        // if (DEBUG) {
+        //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+        // }
+            
+        validIndexPrefix = currentFrame.prefix;
+        //validIndexPrefix = targetUpto;
+
+        currentFrame.scanToFloorFrame(target);
+
+        currentFrame.loadBlock();
+
+        final SeekStatus result = currentFrame.scanToTerm(target, false);
+        if (result == SeekStatus.END) {
+          term.copyBytes(target);
+          termExists = false;
+
+          if (next() != null) {
+            //if (DEBUG) {
+            //System.out.println("  return NOT_FOUND term=" + brToString(term) + " " + term);
+            //}
+            return SeekStatus.NOT_FOUND;
+          } else {
+            //if (DEBUG) {
+            //System.out.println("  return END");
+            //}
+            return SeekStatus.END;
+          }
+        } else {
+          //if (DEBUG) {
+          //System.out.println("  return " + result + " term=" + brToString(term) + " " + term);
+          //}
+          return result;
+        }
+      } else {
+        // Follow this arc
+        term.bytes[targetUpto] = (byte) targetLabel;
+        arc = nextArc;
+        // Aggregate output as we go:
+        assert arc.output != null;
+        if (arc.output != NO_OUTPUT) {
+          output = fstOutputs.add(output, arc.output);
+        }
+
+        //if (DEBUG) {
+        //System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+        //}
+        targetUpto++;
+
+        if (arc.isFinal()) {
+          //if (DEBUG) System.out.println("    arc is final!");
+          currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+          //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+        }
+      }
+    }
+
+    //validIndexPrefix = targetUpto;
+    validIndexPrefix = currentFrame.prefix;
+
+    currentFrame.scanToFloorFrame(target);
+
+    currentFrame.loadBlock();
+
+    final SeekStatus result = currentFrame.scanToTerm(target, false);
+
+    if (result == SeekStatus.END) {
+      term.copyBytes(target);
+      termExists = false;
+      if (next() != null) {
+        //if (DEBUG) {
+        //System.out.println("  return NOT_FOUND term=" + term.utf8ToString() + " " + term);
+        //}
+        return SeekStatus.NOT_FOUND;
+      } else {
+        //if (DEBUG) {
+        //System.out.println("  return END");
+        //}
+        return SeekStatus.END;
+      }
+    } else {
+      return result;
+    }
+  }
+
+  @SuppressWarnings("unused")
+  private void printSeekState(PrintStream out) throws IOException {
+    if (currentFrame == staticFrame) {
+      out.println("  no prior seek");
+    } else {
+      out.println("  prior seek state:");
+      int ord = 0;
+      boolean isSeekFrame = true;
+      while(true) {
+        IDVersionSegmentTermsEnumFrame f = getFrame(ord);
+        assert f != null;
+        final BytesRef prefix = new BytesRef(term.bytes, 0, f.prefix);
+        if (f.nextEnt == -1) {
+          out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+        } else {
+          out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+        }
+        if (fr.index != null) {
+          assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
+          if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.bytes[f.prefix-1]&0xFF)) {
+            out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.bytes[f.prefix-1]&0xFF));
+            throw new RuntimeException("seek state is broken");
+          }
+          Pair<BytesRef,Long> output = Util.get(fr.index, prefix);
+          if (output == null) {
+            out.println("      broken seek state: prefix is not final in index");
+            throw new RuntimeException("seek state is broken");
+          } else if (isSeekFrame && !f.isFloor) {
+            final ByteArrayDataInput reader = new ByteArrayDataInput(output.output1.bytes, output.output1.offset, output.output1.length);
+            final long codeOrig = reader.readVLong();
+            final long code = (f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) | (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0);
+            if (codeOrig != code) {
+              out.println("      broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
+              throw new RuntimeException("seek state is broken");
+            }
+          }
+        }
+        if (f == currentFrame) {
+          break;
+        }
+        if (f.prefix == validIndexPrefix) {
+          isSeekFrame = false;
+        }
+        ord++;
+      }
+    }
+  }
+
+  /* Decodes only the term bytes of the next term.  If caller then asks for
+     metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
+     decode all metadata up to the current term. */
+  @Override
+  public BytesRef next() throws IOException {
+
+    if (in == null) {
+      // Fresh TermsEnum; seek to first term:
+      final FST.Arc<Pair<BytesRef,Long>> arc;
+      if (fr.index != null) {
+        arc = fr.index.getFirstArc(arcs[0]);
+        // Empty string prefix must have an output in the index!
+        assert arc.isFinal();
+      } else {
+        arc = null;
+      }
+      currentFrame = pushFrame(arc, fr.rootCode, 0);
+      currentFrame.loadBlock();
+    }
+
+    targetBeforeCurrentLength = currentFrame.ord;
+
+    assert !eof;
+    //if (DEBUG) {
+    //System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
+    //printSeekState();
+    //}
+
+    if (currentFrame == staticFrame) {
+      // If seek was previously called and the term was
+      // cached, or seek(TermState) was called, usually
+      // caller is just going to pull a D/&PEnum or get
+      // docFreq, etc.  But, if they then call next(),
+      // this method catches up all internal state so next()
+      // works properly:
+      //if (DEBUG) System.out.println("  re-seek to pending term=" + term.utf8ToString() + " " + term);
+      final boolean result = seekExact(term);
+      assert result;
+    }
+
+    // Pop finished blocks
+    while (currentFrame.nextEnt == currentFrame.entCount) {
+      if (!currentFrame.isLastInFloor) {
+        currentFrame.loadNextFloorBlock();
+      } else {
+        //if (DEBUG) System.out.println("  pop frame");
+        if (currentFrame.ord == 0) {
+          //if (DEBUG) System.out.println("  return null");
+          assert setEOF();
+          term.length = 0;
+          validIndexPrefix = 0;
+          currentFrame.rewind();
+          termExists = false;
+          return null;
+        }
+        final long lastFP = currentFrame.fpOrig;
+        currentFrame = stack[currentFrame.ord-1];
+
+        if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
+          // We popped into a frame that's not loaded
+          // yet or not scan'd to the right entry
+          currentFrame.scanToFloorFrame(term);
+          currentFrame.loadBlock();
+          currentFrame.scanToSubBlock(lastFP);
+        }
+
+        // Note that the seek state (last seek) has been
+        // invalidated beyond this depth
+        validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
+        //if (DEBUG) {
+        //System.out.println("  reset validIndexPrefix=" + validIndexPrefix);
+        //}
+      }
+    }
+
+    while(true) {
+      if (currentFrame.next()) {
+        // Push to new block:
+        //if (DEBUG) System.out.println("  push frame");
+        currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
+        // This is a "next" frame -- even if it's
+        // floor'd we must pretend it isn't so we don't
+        // try to scan to the right floor frame:
+        currentFrame.isFloor = false;
+        //currentFrame.hasTerms = true;
+        currentFrame.loadBlock();
+      } else {
+        //if (DEBUG) System.out.println("  return term=" + term.utf8ToString() + " " + term + " currentFrame.ord=" + currentFrame.ord);
+        return term;
+      }
+    }
+  }
+
+  @Override
+  public BytesRef term() {
+    assert !eof;
+    return term;
+  }
+
+  @Override
+  public int docFreq() throws IOException {
+    assert !eof;
+    //if (DEBUG) System.out.println("BTR.docFreq");
+    currentFrame.decodeMetaData();
+    //if (DEBUG) System.out.println("  return " + currentFrame.state.docFreq);
+    return currentFrame.state.docFreq;
+  }
+
+  @Override
+  public long totalTermFreq() throws IOException {
+    assert !eof;
+    currentFrame.decodeMetaData();
+    return currentFrame.state.totalTermFreq;
+  }
+
+  @Override
+  public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
+    assert !eof;
+    //if (DEBUG) {
+    //System.out.println("BTTR.docs seg=" + segment);
+    //}
+    currentFrame.decodeMetaData();
+    //if (DEBUG) {
+    //System.out.println("  state=" + currentFrame.state);
+    //}
+    return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
+  }
+
+  @Override
+  public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+    if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+      // Positions were not indexed:
+      return null;
+    }
+
+    assert !eof;
+    currentFrame.decodeMetaData();
+    return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
+  }
+
+  @Override
+  public void seekExact(BytesRef target, TermState otherState) {
+    // if (DEBUG) {
+    //   System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
+    // }
+    assert clearEOF();
+    if (target.compareTo(term) != 0 || !termExists) {
+      assert otherState != null && otherState instanceof BlockTermState;
+      currentFrame = staticFrame;
+      currentFrame.state.copyFrom(otherState);
+      term.copyBytes(target);
+      currentFrame.metaDataUpto = currentFrame.getTermBlockOrd();
+      assert currentFrame.metaDataUpto > 0;
+      validIndexPrefix = 0;
+    } else {
+      // if (DEBUG) {
+      //   System.out.println("  skip seek: already on target state=" + currentFrame.state);
+      // }
+    }
+  }
+      
+  @Override
+  public TermState termState() throws IOException {
+    assert !eof;
+    currentFrame.decodeMetaData();
+    TermState ts = currentFrame.state.clone();
+    //if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
+    return ts;
+  }
+
+  @Override
+  public void seekExact(long ord) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long ord() {
+    throw new UnsupportedOperationException();
+  }
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
@ -0,0 +1,746 @@
+package org.apache.lucene.codecs.idversion;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+
+final class IDVersionSegmentTermsEnumFrame {
+  // Our index in stack[]:
+  final int ord;
+
+  boolean hasTerms;
+  boolean hasTermsOrig;
+  boolean isFloor;
+
+  FST.Arc<Pair<BytesRef,Long>> arc;
+
+  // File pointer where this block was loaded from
+  long fp;
+  long fpOrig;
+  long fpEnd;
+
+  byte[] suffixBytes = new byte[128];
+  final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();
+
+  byte[] statBytes = new byte[64];
+  final ByteArrayDataInput statsReader = new ByteArrayDataInput();
+
+  byte[] floorData = new byte[32];
+  final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
+
+  // Length of prefix shared by all terms in this block
+  int prefix;
+
+  // Number of entries (term or sub-block) in this block
+  int entCount;
+
+  // Which term we will next read, or -1 if the block
+  // isn't loaded yet
+  int nextEnt;
+
+  // True if this block is either not a floor block,
+  // or, it's the last sub-block of a floor block
+  boolean isLastInFloor;
+
+  // True if all entries are terms
+  boolean isLeafBlock;
+
+  long lastSubFP;
+
+  int nextFloorLabel;
+  int numFollowFloorBlocks;
+
+  // Next term to decode metaData; we decode metaData
+  // lazily so that scanning to find the matching term is
+  // fast and only if you find a match and app wants the
+  // stats or docs/positions enums, will we decode the
+  // metaData
+  int metaDataUpto;
+
+  final BlockTermState state;
+
+  // metadata buffer, holding monotonic values
+  public long[] longs;
+  // metadata buffer, holding general values
+  public byte[] bytes;
+  ByteArrayDataInput bytesReader;
+
+  private final IDVersionSegmentTermsEnum ste;
+
+  public IDVersionSegmentTermsEnumFrame(IDVersionSegmentTermsEnum ste, int ord) throws IOException {
+    this.ste = ste;
+    this.ord = ord;
+    this.state = ste.fr.parent.postingsReader.newTermState();
+    this.state.totalTermFreq = -1;
+    this.longs = new long[ste.fr.longsSize];
+  }
+
+  public void setFloorData(ByteArrayDataInput in, BytesRef source) {
+    final int numBytes = source.length - (in.getPosition() - source.offset);
+    if (numBytes > floorData.length) {
+      floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
+    }
+    System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
+    floorDataReader.reset(floorData, 0, numBytes);
+    numFollowFloorBlocks = floorDataReader.readVInt();
+    nextFloorLabel = floorDataReader.readByte() & 0xff;
+    //if (DEBUG) {
+    //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
+    //}
+  }
+
+  public int getTermBlockOrd() {
+    return isLeafBlock ? nextEnt : state.termBlockOrd;
+  }
+
+  void loadNextFloorBlock() throws IOException {
+    //if (DEBUG) {
+    //System.out.println("    loadNextFloorBlock fp=" + fp + " fpEnd=" + fpEnd);
+    //}
+    assert arc == null || isFloor: "arc=" + arc + " isFloor=" + isFloor;
+    fp = fpEnd;
+    nextEnt = -1;
+    loadBlock();
+  }
+
+  /* Does initial decode of next block of terms; this
+     doesn't actually decode the docFreq, totalTermFreq,
+     postings details (frq/prx offset, etc.) metadata;
+     it just loads them as byte[] blobs which are then      
+     decoded on-demand if the metadata is ever requested
+     for any term in this block.  This enables terms-only
+     intensive consumes (eg certain MTQs, respelling) to
+     not pay the price of decoding metadata they won't
+     use. */
+  void loadBlock() throws IOException {
+
+    // Clone the IndexInput lazily, so that consumers
+    // that just pull a TermsEnum to
+    // seekExact(TermState) don't pay this cost:
+    ste.initIndexInput();
+
+    if (nextEnt != -1) {
+      // Already loaded
+      return;
+    }
+    //System.out.println("blc=" + blockLoadCount);
+
+    ste.in.seek(fp);
+    int code = ste.in.readVInt();
+    entCount = code >>> 1;
+    assert entCount > 0;
+    isLastInFloor = (code & 1) != 0;
+    assert arc == null || (isLastInFloor || isFloor);
+
+    // TODO: if suffixes were stored in random-access
+    // array structure, then we could do binary search
+    // instead of linear scan to find target term; eg
+    // we could have simple array of offsets
+
+    // term suffixes:
+    code = ste.in.readVInt();
+    isLeafBlock = (code & 1) != 0;
+    int numBytes = code >>> 1;
+    if (suffixBytes.length < numBytes) {
+      suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+    }
+    ste.in.readBytes(suffixBytes, 0, numBytes);
+    suffixesReader.reset(suffixBytes, 0, numBytes);
+
+    /*if (DEBUG) {
+      if (arc == null) {
+      System.out.println("    loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
+      } else {
+      System.out.println("    loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
+      }
+      }*/
+
+    // stats
+    numBytes = ste.in.readVInt();
+    if (statBytes.length < numBytes) {
+      statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+    }
+    ste.in.readBytes(statBytes, 0, numBytes);
+    statsReader.reset(statBytes, 0, numBytes);
+    metaDataUpto = 0;
+
+    state.termBlockOrd = 0;
+    nextEnt = 0;
+    lastSubFP = -1;
+
+    // TODO: we could skip this if !hasTerms; but
+    // that's rare so won't help much
+    // metadata
+    numBytes = ste.in.readVInt();
+    if (bytes == null) {
+      bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+      bytesReader = new ByteArrayDataInput();
+    } else if (bytes.length < numBytes) {
+      bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+    }
+    ste.in.readBytes(bytes, 0, numBytes);
+    bytesReader.reset(bytes, 0, numBytes);
+
+
+    // Sub-blocks of a single floor block are always
+    // written one after another -- tail recurse:
+    fpEnd = ste.in.getFilePointer();
+    // if (DEBUG) {
+    //   System.out.println("      fpEnd=" + fpEnd);
+    // }
+  }
+
+  void rewind() {
+
+    // Force reload:
+    fp = fpOrig;
+    nextEnt = -1;
+    hasTerms = hasTermsOrig;
+    if (isFloor) {
+      floorDataReader.rewind();
+      numFollowFloorBlocks = floorDataReader.readVInt();
+      nextFloorLabel = floorDataReader.readByte() & 0xff;
+    }
+
+    /*
+    //System.out.println("rewind");
+    // Keeps the block loaded, but rewinds its state:
+    if (nextEnt > 0 || fp != fpOrig) {
+    if (DEBUG) {
+    System.out.println("      rewind frame ord=" + ord + " fpOrig=" + fpOrig + " fp=" + fp + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " nextEnt=" + nextEnt + " prefixLen=" + prefix);
+    }
+    if (fp != fpOrig) {
+    fp = fpOrig;
+    nextEnt = -1;
+    } else {
+    nextEnt = 0;
+    }
+    hasTerms = hasTermsOrig;
+    if (isFloor) {
+    floorDataReader.rewind();
+    numFollowFloorBlocks = floorDataReader.readVInt();
+    nextFloorLabel = floorDataReader.readByte() & 0xff;
+    }
+    assert suffixBytes != null;
+    suffixesReader.rewind();
+    assert statBytes != null;
+    statsReader.rewind();
+    metaDataUpto = 0;
+    state.termBlockOrd = 0;
+    // TODO: skip this if !hasTerms?  Then postings
+    // impl wouldn't have to write useless 0 byte
+    postingsReader.resetTermsBlock(fieldInfo, state);
+    lastSubFP = -1;
+    } else if (DEBUG) {
+    System.out.println("      skip rewind fp=" + fp + " fpOrig=" + fpOrig + " nextEnt=" + nextEnt + " ord=" + ord);
+    }
+    */
+  }
+
+  public boolean next() {
+    return isLeafBlock ? nextLeaf() : nextNonLeaf();
+  }
+
+  // Decodes next entry; returns true if it's a sub-block
+  public boolean nextLeaf() {
+    //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+    assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+    nextEnt++;
+    suffix = suffixesReader.readVInt();
+    startBytePos = suffixesReader.getPosition();
+    ste.term.length = prefix + suffix;
+    if (ste.term.bytes.length < ste.term.length) {
+      ste.term.grow(ste.term.length);
+    }
+    suffixesReader.readBytes(ste.term.bytes, prefix, suffix);
+    // A normal term
+    ste.termExists = true;
+    return false;
+  }
+
+  public boolean nextNonLeaf() {
+    //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+    assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+    nextEnt++;
+    final int code = suffixesReader.readVInt();
+    suffix = code >>> 1;
+    startBytePos = suffixesReader.getPosition();
+    ste.term.length = prefix + suffix;
+    if (ste.term.bytes.length < ste.term.length) {
+      ste.term.grow(ste.term.length);
+    }
+    suffixesReader.readBytes(ste.term.bytes, prefix, suffix);
+    if ((code & 1) == 0) {
+      // A normal term
+      ste.termExists = true;
+      subCode = 0;
+      state.termBlockOrd++;
+      return false;
+    } else {
+      // A sub-block; make sub-FP absolute:
+      ste.termExists = false;
+      subCode = suffixesReader.readVLong();
+      lastSubFP = fp - subCode;
+      //if (DEBUG) {
+      //System.out.println("    lastSubFP=" + lastSubFP);
+      //}
+      return true;
+    }
+  }
+        
+  // TODO: make this array'd so we can do bin search?
+  // likely not worth it?  need to measure how many
+  // floor blocks we "typically" get
+  public void scanToFloorFrame(BytesRef target) {
+
+    if (!isFloor || target.length <= prefix) {
+      // if (DEBUG) {
+      //   System.out.println("    scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + target.length + " vs prefix=" + prefix);
+      // }
+      return;
+    }
+
+    final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
+
+    // if (DEBUG) {
+    //   System.out.println("    scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + toHex(targetLabel) + " vs nextFloorLabel=" + toHex(nextFloorLabel) + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+    // }
+
+    if (targetLabel < nextFloorLabel) {
+      // if (DEBUG) {
+      //   System.out.println("      already on correct block");
+      // }
+      return;
+    }
+
+    assert numFollowFloorBlocks != 0;
+
+    long newFP = fpOrig;
+    while (true) {
+      final long code = floorDataReader.readVLong();
+      newFP = fpOrig + (code >>> 1);
+      hasTerms = (code & 1) != 0;
+      // if (DEBUG) {
+      //   System.out.println("      label=" + toHex(nextFloorLabel) + " fp=" + newFP + " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
+      // }
+            
+      isLastInFloor = numFollowFloorBlocks == 1;
+      numFollowFloorBlocks--;
+
+      if (isLastInFloor) {
+        nextFloorLabel = 256;
+        // if (DEBUG) {
+        //   System.out.println("        stop!  last block nextFloorLabel=" + toHex(nextFloorLabel));
+        // }
+        break;
+      } else {
+        nextFloorLabel = floorDataReader.readByte() & 0xff;
+        if (targetLabel < nextFloorLabel) {
+          // if (DEBUG) {
+          //   System.out.println("        stop!  nextFloorLabel=" + toHex(nextFloorLabel));
+          // }
+          break;
+        }
+      }
+    }
+
+    if (newFP != fp) {
+      // Force re-load of the block:
+      // if (DEBUG) {
+      //   System.out.println("      force switch to fp=" + newFP + " oldFP=" + fp);
+      // }
+      nextEnt = -1;
+      fp = newFP;
+    } else {
+      // if (DEBUG) {
+      //   System.out.println("      stay on same fp=" + newFP);
+      // }
+    }
+  }
+    
+  public void decodeMetaData() throws IOException {
+
+    //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
+
+    // lazily catch up on metadata decode:
+    final int limit = getTermBlockOrd();
+    boolean absolute = metaDataUpto == 0;
+    assert limit > 0;
+
+    // TODO: better API would be "jump straight to term=N"???
+    while (metaDataUpto < limit) {
+
+      // TODO: we could make "tiers" of metadata, ie,
+      // decode docFreq/totalTF but don't decode postings
+      // metadata; this way caller could get
+      // docFreq/totalTF w/o paying decode cost for
+      // postings
+
+      // TODO: if docFreq were bulk decoded we could
+      // just skipN here:
+
+      // stats
+      state.docFreq = statsReader.readVInt();
+      //if (DEBUG) System.out.println("    dF=" + state.docFreq);
+      if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+        state.totalTermFreq = state.docFreq + statsReader.readVLong();
+        //if (DEBUG) System.out.println("    totTF=" + state.totalTermFreq);
+      }
+      // metadata 
+      for (int i = 0; i < ste.fr.longsSize; i++) {
+        longs[i] = bytesReader.readVLong();
+      }
+      ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);
+
+      metaDataUpto++;
+      absolute = false;
+    }
+    state.termBlockOrd = metaDataUpto;
+  }
+
+  // Used only by assert
+  private boolean prefixMatches(BytesRef target) {
+    for(int bytePos=0;bytePos<prefix;bytePos++) {
+      if (target.bytes[target.offset + bytePos] != ste.term.bytes[bytePos]) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  // Scans to sub-block that has this target fp; only
+  // called by next(); NOTE: does not set
+  // startBytePos/suffix as a side effect
+  public void scanToSubBlock(long subFP) {
+    assert !isLeafBlock;
+    //if (DEBUG) System.out.println("  scanToSubBlock fp=" + fp + " subFP=" + subFP + " entCount=" + entCount + " lastSubFP=" + lastSubFP);
+    //assert nextEnt == 0;
+    if (lastSubFP == subFP) {
+      //if (DEBUG) System.out.println("    already positioned");
+      return;
+    }
+    assert subFP < fp : "fp=" + fp + " subFP=" + subFP;
+    final long targetSubCode = fp - subFP;
+    //if (DEBUG) System.out.println("    targetSubCode=" + targetSubCode);
+    while(true) {
+      assert nextEnt < entCount;
+      nextEnt++;
+      final int code = suffixesReader.readVInt();
+      suffixesReader.skipBytes(isLeafBlock ? code : code >>> 1);
+      //if (DEBUG) System.out.println("    " + nextEnt + " (of " + entCount + ") ent isSubBlock=" + ((code&1)==1));
+      if ((code & 1) != 0) {
+        final long subCode = suffixesReader.readVLong();
+        //if (DEBUG) System.out.println("      subCode=" + subCode);
+        if (targetSubCode == subCode) {
+          //if (DEBUG) System.out.println("        match!");
+          lastSubFP = subFP;
+          return;
+        }
+      } else {
+        state.termBlockOrd++;
+      }
+    }
+  }
+
+  // NOTE: sets startBytePos/suffix as a side effect
+  public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException {
+    return isLeafBlock ? scanToTermLeaf(target, exactOnly) : scanToTermNonLeaf(target, exactOnly);
+  }
+
+  private int startBytePos;
+  private int suffix;
+  private long subCode;
+
+  // Target's prefix matches this block's prefix; we
+  // scan the entries check if the suffix matches.
+  public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
+
+    // if (DEBUG) System.out.println("    scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
+
+    assert nextEnt != -1;
+
+    ste.termExists = true;
+    subCode = 0;
+
+    if (nextEnt == entCount) {
+      if (exactOnly) {
+        fillTerm();
+      }
+      return SeekStatus.END;
+    }
+
+    assert prefixMatches(target);
+
+    // Loop over each entry (term or sub-block) in this block:
+    //nextTerm: while(nextEnt < entCount) {
+    nextTerm: while (true) {
+      nextEnt++;
+
+      suffix = suffixesReader.readVInt();
+
+      // if (DEBUG) {
+      //   BytesRef suffixBytesRef = new BytesRef();
+      //   suffixBytesRef.bytes = suffixBytes;
+      //   suffixBytesRef.offset = suffixesReader.getPosition();
+      //   suffixBytesRef.length = suffix;
+      //   System.out.println("      cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+      // }
+
+      final int termLen = prefix + suffix;
+      startBytePos = suffixesReader.getPosition();
+      suffixesReader.skipBytes(suffix);
+
+      final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
+      int targetPos = target.offset + prefix;
+
+      // Loop over bytes in the suffix, comparing to
+      // the target
+      int bytePos = startBytePos;
+      while(true) {
+        final int cmp;
+        final boolean stop;
+        if (targetPos < targetLimit) {
+          cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
+          stop = false;
+        } else {
+          assert targetPos == targetLimit;
+          cmp = termLen - target.length;
+          stop = true;
+        }
+
+        if (cmp < 0) {
+          // Current entry is still before the target;
+          // keep scanning
+
+          if (nextEnt == entCount) {
+            if (exactOnly) {
+              fillTerm();
+            }
+            // We are done scanning this block
+            break nextTerm;
+          } else {
+            continue nextTerm;
+          }
+        } else if (cmp > 0) {
+
+          // Done!  Current entry is after target --
+          // return NOT_FOUND:
+          fillTerm();
+
+          if (!exactOnly && !ste.termExists) {
+            // We are on a sub-block, and caller wants
+            // us to position to the next term after
+            // the target, so we must recurse into the
+            // sub-frame(s):
+            ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen);
+            ste.currentFrame.loadBlock();
+            while (ste.currentFrame.next()) {
+              ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length);
+              ste.currentFrame.loadBlock();
+            }
+          }
+                
+          //if (DEBUG) System.out.println("        not found");
+          return SeekStatus.NOT_FOUND;
+        } else if (stop) {
+          // Exact match!
+
+          // This cannot be a sub-block because we
+          // would have followed the index to this
+          // sub-block from the start:
+
+          assert ste.termExists;
+          fillTerm();
+          //if (DEBUG) System.out.println("        found!");
+          return SeekStatus.FOUND;
+        }
+      }
+    }
+
+    // It is possible (and OK) that terms index pointed us
+    // at this block, but, we scanned the entire block and
+    // did not find the term to position to.  This happens
+    // when the target is after the last term in the block
+    // (but, before the next term in the index).  EG
+    // target could be foozzz, and terms index pointed us
+    // to the foo* block, but the last term in this block
+    // was fooz (and, eg, first term in the next block will
+    // bee fop).
+    //if (DEBUG) System.out.println("      block end");
+    if (exactOnly) {
+      fillTerm();
+    }
+
+    // TODO: not consistent that in the
+    // not-exact case we don't next() into the next
+    // frame here
+    return SeekStatus.END;
+  }
+
+  // Target's prefix matches this block's prefix; we
+  // scan the entries check if the suffix matches.
+  public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
+
+    //if (DEBUG) System.out.println("    scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
+
+    assert nextEnt != -1;
+
+    if (nextEnt == entCount) {
+      if (exactOnly) {
+        fillTerm();
+        ste.termExists = subCode == 0;
+      }
+      return SeekStatus.END;
+    }
+
+    assert prefixMatches(target);
+
+    // Loop over each entry (term or sub-block) in this block:
+    //nextTerm: while(nextEnt < entCount) {
+    nextTerm: while (true) {
+      nextEnt++;
+
+      final int code = suffixesReader.readVInt();
+      suffix = code >>> 1;
+      // if (DEBUG) {
+      //   BytesRef suffixBytesRef = new BytesRef();
+      //   suffixBytesRef.bytes = suffixBytes;
+      //   suffixBytesRef.offset = suffixesReader.getPosition();
+      //   suffixBytesRef.length = suffix;
+      //   System.out.println("      cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+      // }
+
+      ste.termExists = (code & 1) == 0;
+      final int termLen = prefix + suffix;
+      startBytePos = suffixesReader.getPosition();
+      suffixesReader.skipBytes(suffix);
+      if (ste.termExists) {
+        state.termBlockOrd++;
+        subCode = 0;
+      } else {
+        subCode = suffixesReader.readVLong();
+        lastSubFP = fp - subCode;
+      }
+
+      final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
+      int targetPos = target.offset + prefix;
+
+      // Loop over bytes in the suffix, comparing to
+      // the target
+      int bytePos = startBytePos;
+      while(true) {
+        final int cmp;
+        final boolean stop;
+        if (targetPos < targetLimit) {
+          cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
+          stop = false;
+        } else {
+          assert targetPos == targetLimit;
+          cmp = termLen - target.length;
+          stop = true;
+        }
+
+        if (cmp < 0) {
+          // Current entry is still before the target;
+          // keep scanning
+
+          if (nextEnt == entCount) {
+            if (exactOnly) {
+              fillTerm();
+              //termExists = true;
+            }
+            // We are done scanning this block
+            break nextTerm;
+          } else {
+            continue nextTerm;
+          }
+        } else if (cmp > 0) {
+
+          // Done!  Current entry is after target --
+          // return NOT_FOUND:
+          fillTerm();
+
+          if (!exactOnly && !ste.termExists) {
+            // We are on a sub-block, and caller wants
+            // us to position to the next term after
+            // the target, so we must recurse into the
+            // sub-frame(s):
+            ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen);
+            ste.currentFrame.loadBlock();
+            while (ste.currentFrame.next()) {
+              ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length);
+              ste.currentFrame.loadBlock();
+            }
+          }
+                
+          //if (DEBUG) System.out.println("        not found");
+          return SeekStatus.NOT_FOUND;
+        } else if (stop) {
+          // Exact match!
+
+          // This cannot be a sub-block because we
+          // would have followed the index to this
+          // sub-block from the start:
+
+          assert ste.termExists;
+          fillTerm();
+          //if (DEBUG) System.out.println("        found!");
+          return SeekStatus.FOUND;
+        }
+      }
+    }
+
+    // It is possible (and OK) that terms index pointed us
+    // at this block, but, we scanned the entire block and
+    // did not find the term to position to.  This happens
+    // when the target is after the last term in the block
+    // (but, before the next term in the index).  EG
+    // target could be foozzz, and terms index pointed us
+    // to the foo* block, but the last term in this block
+    // was fooz (and, eg, first term in the next block will
+    // bee fop).
+    //if (DEBUG) System.out.println("      block end");
+    if (exactOnly) {
+      fillTerm();
+    }
+
+    // TODO: not consistent that in the
+    // not-exact case we don't next() into the next
+    // frame here
+    return SeekStatus.END;
+  }
+
+  private void fillTerm() {
+    final int termLength = prefix + suffix;
+    ste.term.length = prefix + suffix;
+    if (ste.term.bytes.length < termLength) {
+      ste.term.grow(termLength);
+    }
+    System.arraycopy(suffixBytes, startBytePos, ste.term.bytes, prefix, suffix);
+  }
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionTermState.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionTermState.java
@ -0,0 +1,41 @@
+package org.apache.lucene.codecs.idversion;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.TermState;
+
+final class IDVersionTermState extends BlockTermState {
+  long idVersion;
+  int docID;
+
+  @Override
+  public IDVersionTermState clone() {
+    IDVersionTermState other = new IDVersionTermState();
+    other.copyFrom(this);
+    return other;
+  }
+
+  @Override
+  public void copyFrom(TermState _other) {
+    super.copyFrom(_other);
+    IDVersionTermState other = (IDVersionTermState) _other;
+    idVersion = other.idVersion;
+    docID = other.docID;
+  }
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
@ -0,0 +1,319 @@
+package org.apache.lucene.codecs.idversion;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.TreeMap;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Outputs;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.Util;
+
+/** A block-based terms index and dictionary that assigns
+ *  terms to variable length blocks according to how they
+ *  share prefixes.  The terms index is a prefix trie
+ *  whose leaves are term blocks.  The advantage of this
+ *  approach is that seekExact is often able to
+ *  determine a term cannot exist without doing any IO, and
+ *  intersection with Automata is very fast.  Note that this
+ *  terms dictionary has it's own fixed terms index (ie, it
+ *  does not support a pluggable terms index
+ *  implementation).
+ *
+ *  <p><b>NOTE</b>: this terms dictionary supports
+ *  min/maxItemsPerBlock during indexing to control how
+ *  much memory the terms index uses.</p>
+ *
+ *  <p>The data structure used by this implementation is very
+ *  similar to a burst trie
+ *  (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
+ *  but with added logic to break up too-large blocks of all
+ *  terms sharing a given prefix into smaller ones.</p>
+ *
+ *  <p>Use {@link org.apache.lucene.index.CheckIndex} with the <code>-verbose</code>
+ *  option to see summary statistics on the blocks in the
+ *  dictionary.
+ *
+ *  See {@link BlockTreeTermsWriter}.
+ *
+ * @lucene.experimental
+ */
+
+final class VersionBlockTreeTermsReader extends FieldsProducer {
+
+  // Open input to the main terms dict file (_X.tiv)
+  final IndexInput in;
+
+  //private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+  // Reads the terms dict entries, to gather state to
+  // produce DocsEnum on demand
+  final PostingsReaderBase postingsReader;
+
+  private final TreeMap<String,VersionFieldReader> fields = new TreeMap<>();
+
+  /** File offset where the directory starts in the terms file. */
+  private long dirOffset;
+
+  /** File offset where the directory starts in the index file. */
+  private long indexDirOffset;
+
+  final String segment;
+  
+  private final int version;
+
+  /** Sole constructor. */
+  public VersionBlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
+                                     PostingsReaderBase postingsReader, IOContext ioContext,
+                                     String segmentSuffix)
+    throws IOException {
+    
+    this.postingsReader = postingsReader;
+
+    this.segment = info.name;
+    in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VersionBlockTreeTermsWriter.TERMS_EXTENSION),
+                       ioContext);
+
+    boolean success = false;
+    IndexInput indexIn = null;
+
+    try {
+      version = readHeader(in);
+      indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VersionBlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
+                                ioContext);
+      int indexVersion = readIndexHeader(indexIn);
+      if (indexVersion != version) {
+        throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
+      }
+      
+      // verify
+      if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
+        CodecUtil.checksumEntireFile(indexIn);
+      }
+
+      // Have PostingsReader init itself
+      postingsReader.init(in);
+
+      // Read per-field details
+      seekDir(in, dirOffset);
+      seekDir(indexIn, indexDirOffset);
+
+      final int numFields = in.readVInt();
+      if (numFields < 0) {
+        throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
+      }
+
+      PairOutputs<BytesRef,Long> fstOutputs = VersionBlockTreeTermsWriter.getFSTOutputs();
+
+      for(int i=0;i<numFields;i++) {
+        final int field = in.readVInt();
+        final long numTerms = in.readVLong();
+        assert numTerms >= 0;
+        final int numBytes = in.readVInt();
+        final BytesRef code = new BytesRef(new byte[numBytes]);
+        in.readBytes(code.bytes, 0, numBytes);
+        code.length = numBytes;
+        final long version = in.readVLong();
+        final Pair<BytesRef,Long> rootCode = fstOutputs.newPair(code, version);
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        assert fieldInfo != null: "field=" + field;
+        final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
+        final long sumDocFreq = in.readVLong();
+        final int docCount = in.readVInt();
+        final int longsSize = version >= VersionBlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
+
+        BytesRef minTerm, maxTerm;
+        if (version >= VersionBlockTreeTermsWriter.VERSION_MIN_MAX_TERMS) {
+          minTerm = readBytesRef(in);
+          maxTerm = readBytesRef(in);
+        } else {
+          minTerm = maxTerm = null;
+        }
+        if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
+          throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
+        }
+        if (sumDocFreq < docCount) {  // #postings must be >= #docs with field
+          throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")");
+        }
+        if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
+          throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
+        }
+        final long indexStartFP = indexIn.readVLong();
+        VersionFieldReader previous = fields.put(fieldInfo.name,       
+                                                 new VersionFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
+                                                                        indexStartFP, longsSize, indexIn, minTerm, maxTerm));
+        if (previous != null) {
+          throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
+        }
+      }
+      indexIn.close();
+
+      success = true;
+    } finally {
+      if (!success) {
+        // this.close() will close in:
+        IOUtils.closeWhileHandlingException(indexIn, this);
+      }
+    }
+  }
+
+  private static BytesRef readBytesRef(IndexInput in) throws IOException {
+    BytesRef bytes = new BytesRef();
+    bytes.length = in.readVInt();
+    bytes.bytes = new byte[bytes.length];
+    in.readBytes(bytes.bytes, 0, bytes.length);
+    return bytes;
+  }
+
+  /** Reads terms file header. */
+  private int readHeader(IndexInput input) throws IOException {
+    int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_CODEC_NAME,
+                          VersionBlockTreeTermsWriter.VERSION_START,
+                          VersionBlockTreeTermsWriter.VERSION_CURRENT);
+    if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
+      dirOffset = input.readLong();
+    }
+    return version;
+  }
+
+  /** Reads index file header. */
+  private int readIndexHeader(IndexInput input) throws IOException {
+    int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
+                          VersionBlockTreeTermsWriter.VERSION_START,
+                          VersionBlockTreeTermsWriter.VERSION_CURRENT);
+    if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
+      indexDirOffset = input.readLong(); 
+    }
+    return version;
+  }
+
+  /** Seek {@code input} to the directory offset. */
+  private void seekDir(IndexInput input, long dirOffset)
+      throws IOException {
+    if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
+      input.seek(input.length() - CodecUtil.footerLength() - 8);
+      dirOffset = input.readLong();
+    } else if (version >= VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
+      input.seek(input.length() - 8);
+      dirOffset = input.readLong();
+    }
+    input.seek(dirOffset);
+  }
+
+  // for debugging
+  // private static String toHex(int v) {
+  //   return "0x" + Integer.toHexString(v);
+  // }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(in, postingsReader);
+    } finally { 
+      // Clear so refs to terms index is GCable even if
+      // app hangs onto us:
+      fields.clear();
+    }
+  }
+
+  @Override
+  public Iterator<String> iterator() {
+    return Collections.unmodifiableSet(fields.keySet()).iterator();
+  }
+
+  @Override
+  public Terms terms(String field) throws IOException {
+    assert field != null;
+    return fields.get(field);
+  }
+
+  @Override
+  public int size() {
+    return fields.size();
+  }
+
+  // for debugging
+  String brToString(BytesRef b) {
+    if (b == null) {
+      return "null";
+    } else {
+      try {
+        return b.utf8ToString() + " " + b;
+      } catch (Throwable t) {
+        // If BytesRef isn't actually UTF8, or it's eg a
+        // prefix of UTF8 that ends mid-unicode-char, we
+        // fallback to hex:
+        return b.toString();
+      }
+    }
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    long sizeInByes = ((postingsReader!=null) ? postingsReader.ramBytesUsed() : 0);
+    for(VersionFieldReader reader : fields.values()) {
+      sizeInByes += reader.ramBytesUsed();
+    }
+    return sizeInByes;
+  }
+
+  @Override
+  public void checkIntegrity() throws IOException {
+    if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {      
+      // term dictionary
+      CodecUtil.checksumEntireFile(in);
+      
+      // postings
+      postingsReader.checkIntegrity();
+    }
+  }
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionFieldReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionFieldReader.java
@ -0,0 +1,163 @@
+package org.apache.lucene.codecs.idversion;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+
+/** BlockTree's implementation of {@link Terms}. */
+// public for CheckIndex:
+final class VersionFieldReader extends Terms {
+  final long numTerms;
+  final FieldInfo fieldInfo;
+  final long sumTotalTermFreq;
+  final long sumDocFreq;
+  final int docCount;
+  final long indexStartFP;
+  final long rootBlockFP;
+  final Pair<BytesRef,Long> rootCode;
+  final BytesRef minTerm;
+  final BytesRef maxTerm;
+  final int longsSize;
+  final VersionBlockTreeTermsReader parent;
+
+  final FST<Pair<BytesRef,Long>> index;
+  //private boolean DEBUG;
+
+  VersionFieldReader(VersionBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, Pair<BytesRef,Long> rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
+              long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
+    assert numTerms > 0;
+    this.fieldInfo = fieldInfo;
+    //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
+    this.parent = parent;
+    this.numTerms = numTerms;
+    this.sumTotalTermFreq = sumTotalTermFreq; 
+    this.sumDocFreq = sumDocFreq; 
+    this.docCount = docCount;
+    this.indexStartFP = indexStartFP;
+    this.rootCode = rootCode;
+    this.longsSize = longsSize;
+    this.minTerm = minTerm;
+    this.maxTerm = maxTerm;
+    // if (DEBUG) {
+    //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
+    // }
+
+    rootBlockFP = (new ByteArrayDataInput(rootCode.output1.bytes, rootCode.output1.offset, rootCode.output1.length)).readVLong() >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+
+    if (indexIn != null) {
+      final IndexInput clone = indexIn.clone();
+      //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
+      clone.seek(indexStartFP);
+      index = new FST<>(clone, VersionBlockTreeTermsWriter.getFSTOutputs());
+        
+      /*
+        if (false) {
+        final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+        Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+        Util.toDot(index, w, false, false);
+        System.out.println("FST INDEX: SAVED to " + dotFileName);
+        w.close();
+        }
+      */
+    } else {
+      index = null;
+    }
+  }
+
+  @Override
+  public BytesRef getMin() throws IOException {
+    if (minTerm == null) {
+      // Older index that didn't store min/maxTerm
+      return super.getMin();
+    } else {
+      return minTerm;
+    }
+  }
+
+  @Override
+  public BytesRef getMax() throws IOException {
+    if (maxTerm == null) {
+      // Older index that didn't store min/maxTerm
+      return super.getMax();
+    } else {
+      return maxTerm;
+    }
+  }
+
+  @Override
+  public boolean hasFreqs() {
+    return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+  }
+
+  @Override
+  public boolean hasOffsets() {
+    return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+  }
+
+  @Override
+  public boolean hasPositions() {
+    return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+  }
+    
+  @Override
+  public boolean hasPayloads() {
+    return fieldInfo.hasPayloads();
+  }
+
+  @Override
+  public TermsEnum iterator(TermsEnum reuse) throws IOException {
+    return new IDVersionSegmentTermsEnum(this);
+  }
+
+  @Override
+  public long size() {
+    return numTerms;
+  }
+
+  @Override
+  public long getSumTotalTermFreq() {
+    return sumTotalTermFreq;
+  }
+
+  @Override
+  public long getSumDocFreq() {
+    return sumDocFreq;
+  }
+
+  @Override
+  public int getDocCount() {
+    return docCount;
+  }
+
+  /** Returns approximate RAM bytes used */
+  public long ramBytesUsed() {
+    return ((index!=null)? index.sizeInBytes() : 0);
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
@ -86,7 +86,7 @@ import org.apache.lucene.util.fst.Util;
 * @lucene.experimental
 */

-public class BlockTreeTermsReader extends FieldsProducer {
+public final class BlockTreeTermsReader extends FieldsProducer {

  // Open input to the main terms dict file (_X.tib)
  final IndexInput in;
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@ -187,7 +187,7 @@ import org.apache.lucene.util.packed.PackedInts;
 * @see BlockTreeTermsReader
 * @lucene.experimental
 */
-public class BlockTreeTermsWriter extends FieldsConsumer {
+public final class BlockTreeTermsWriter extends FieldsConsumer {

  /** Suggested default value for the {@code
   *  minItemsInBlock} parameter to {@link
@ -1045,12 +1045,12 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
      // terms into "good" blocks; we don't save the
      // resulting FST:
      blockBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
-                                         0, 0, true,
-                                         true, Integer.MAX_VALUE,
-                                         noOutputs,
-                                         new FindBlocks(), false,
-                                         PackedInts.COMPACT,
-                                         true, 15);
+                                   0, 0, true,
+                                   true, Integer.MAX_VALUE,
+                                   noOutputs,
+                                   new FindBlocks(), false,
+                                   PackedInts.COMPACT,
+                                   true, 15);

      this.longsSize = postingsWriter.setField(fieldInfo);
    }