LUCENE-5675: add testRandom; sometimes fails

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1595229 13f79535-47bb-0310-9956-ffa450edef68
2014-05-16 15:17:30 +00:00 · 2014-05-16 15:17:30 +00:00 · fa51d5972a
parent 83332c046b
commit fa51d5972a
10 changed files with 488 additions and 179 deletions
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@ -45,6 +45,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
  // Lazy init:
  IndexInput in;

+  private static boolean DEBUG = true;
+
  private IDVersionSegmentTermsEnumFrame[] stack;
  private final IDVersionSegmentTermsEnumFrame staticFrame;
  IDVersionSegmentTermsEnumFrame currentFrame;
@ -214,13 +216,27 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
    return seekExact(target, 0);
  }

+  // for debugging
+  @SuppressWarnings("unused")
+  private String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
+    }
+  }
+
  /** Returns false if the term deos not exist, or it exists but its version is < minIDVersion. */
  public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {

    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
    }
-    System.out.println("seekExact target=" + target + " minIDVersion=" + minIDVersion);
+
+    // nocommit would be nice if somehow on doing deletes we didn't have to double-lookup again...

    if (term.bytes.length <= target.length) {
      term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
@ -228,10 +244,10 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {

    assert clearEOF();

-    // if (DEBUG) {
-    //   System.out.println("\nBTTR.seekExact seg=" + segment + " target=" + fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
-    //   printSeekState();
-    // }
+     if (DEBUG) {
+       System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
+       printSeekState(System.out);
+     }

    FST.Arc<Pair<BytesRef,Long>> arc;
    int targetUpto;
@ -239,6 +255,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {

    targetBeforeCurrentLength = currentFrame.ord;

+    // nocommit we could stop earlier w/ the version check, every time we traverse an index arc we can check?
+
    if (currentFrame != staticFrame) {

      // We are already seek'd; find the common
@ -248,9 +266,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
      // seeks to foobaz, we can re-use the seek state
      // for the first 5 bytes.

-      // if (DEBUG) {
-      //   System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
-      // }
+       if (DEBUG) {
+         System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
+       }

      arc = arcs[0];
      assert arc.isFinal();
@ -258,7 +276,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
      targetUpto = 0;

      IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
-      assert validIndexPrefix <= term.length;
+      assert validIndexPrefix <= term.length: "validIndexPrefix=" + validIndexPrefix + " term.length=" + term.length + " seg=" + fr.parent.segment;

      final int targetLimit = Math.min(target.length, validIndexPrefix);

@ -270,9 +288,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
      // First compare up to valid seek frames:
      while (targetUpto < targetLimit) {
        cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
-        // if (DEBUG) {
-        //   System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
-        // }
+         if (DEBUG) {
+           System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
+         }
        if (cmp != 0) {
          break;
        }
@ -300,9 +318,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
        final int targetLimit2 = Math.min(target.length, term.length);
        while (targetUpto < targetLimit2) {
          cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
-          // if (DEBUG) {
-          //   System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
-          // }
+           if (DEBUG) {
+             System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+           }
          if (cmp != 0) {
            break;
          }
@ -319,9 +337,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
        // Common case: target term is after current
        // term, ie, app is seeking multiple terms
        // in sorted order
-        // if (DEBUG) {
-        //   System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
-        // }
+         if (DEBUG) {
+           System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto);
+         }
        currentFrame = lastFrame;

      } else if (cmp > 0) {
@ -330,23 +348,41 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
        // keep the currentFrame but we must rewind it
        // (so we scan from the start)
        targetBeforeCurrentLength = 0;
-        // if (DEBUG) {
-        //   System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
-        // }
+         if (DEBUG) {
+           System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+         }
        currentFrame = lastFrame;
        currentFrame.rewind();
      } else {
        // Target is exactly the same as current term
        assert term.length == target.length;
        if (termExists) {
-          // if (DEBUG) {
-          //   System.out.println("  target is same as current; return true");
-          // }
+
+          if (currentFrame.maxIDVersion < minIDVersion) {
+            // The max version for all terms in this block is lower than the minVersion
+            if (DEBUG) {
+              System.out.println("  target is same as current maxIDVersion=" + currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false");
+            }
+            return false;
+          }
+
+          currentFrame.decodeMetaData();
+          if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
+            // The max version for this term is lower than the minVersion
+            if (DEBUG) {
+              System.out.println("  target is same as current but version=" + ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" + minIDVersion + "; return false");
+            }
+            return false;
+          }
+
+           if (DEBUG) {
+             System.out.println("  target is same as current; return true");
+           }
          return true;
        } else {
-          // if (DEBUG) {
-          //   System.out.println("  target is same as current but term doesn't exist");
-          // }
+           if (DEBUG) {
+             System.out.println("  target is same as current but term doesn't exist");
+           }
        }
        //validIndexPrefix = currentFrame.depth;
        //term.length = target.length;
@ -357,15 +393,15 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {

      targetBeforeCurrentLength = -1;
      arc = fr.index.getFirstArc(arcs[0]);
-      System.out.println("first arc=" + arc);
+      //System.out.println("first arc=" + arc);

      // Empty string prefix must have an output (block) in the index!
      assert arc.isFinal();
      assert arc.output != null;

-      // if (DEBUG) {
-      //   System.out.println("    no seek state; push root frame");
-      // }
+       if (DEBUG) {
+         System.out.println("    no seek state; push root frame");
+       }

      output = arc.output;

@ -376,9 +412,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
      currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
    }

-    // if (DEBUG) {
-    //   System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
-    // }
+     if (DEBUG) {
+       System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+     }

    while (targetUpto < target.length) {

@ -389,9 +425,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
      if (nextArc == null) {

        // Index is exhausted
-        // if (DEBUG) {
-        //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
-        // }
+         if (DEBUG) {
+           System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + Integer.toHexString(targetLabel));
+         }
            
        validIndexPrefix = currentFrame.prefix;
        //validIndexPrefix = targetUpto;
@ -402,15 +438,21 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
          termExists = false;
          term.bytes[targetUpto] = (byte) targetLabel;
          term.length = 1+targetUpto;
-          // if (DEBUG) {
-          //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
-          // }
+           if (DEBUG) {
+             System.out.println("  FAST NOT_FOUND term=" + brToString(term));
+           }
          return false;
        }
-        System.out.println("  check output=" +((output.output2)));
+        //System.out.println("  check maxVersion=" + currentFrame.maxIDVersion + " vs " + minIDVersion);

        if (currentFrame.maxIDVersion < minIDVersion) {
          // The max version for all terms in this block is lower than the minVersion
+          //termExists = false;
+          //term.bytes[targetUpto] = (byte) targetLabel;
+          //term.length = 1+targetUpto;
+          if (DEBUG) {
+            System.out.println("    FAST version NOT_FOUND term=" + brToString(term) + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix);
+          }
          return false;
        }

@ -418,20 +460,24 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {

        final SeekStatus result = currentFrame.scanToTerm(target, true);            
        if (result == SeekStatus.FOUND) {
-          // if (DEBUG) {
-          //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
-          // }
-
          currentFrame.decodeMetaData();
          if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
            // The max version for this term is lower than the minVersion
+            if (DEBUG) {
+              System.out.println("    return NOT_FOUND: idVersion=" + ((IDVersionTermState) currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion);
+            }
            return false;
          }
+
+           if (DEBUG) {
+             System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
+           }
+
          return true;
        } else {
-          // if (DEBUG) {
-          //   System.out.println("  got " + result + "; return NOT_FOUND term=" + brToString(term));
-          // }
+           if (DEBUG) {
+             System.out.println("  got " + result + "; return NOT_FOUND term=" + brToString(term));
+           }
          return false;
        }
      } else {
@ -444,15 +490,15 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
          output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
        }

-        // if (DEBUG) {
-        //   System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
-        // }
+         if (DEBUG) {
+           System.out.println("    index: follow label=" + Integer.toHexString((target.bytes[target.offset + targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+         }
        targetUpto++;

        if (arc.isFinal()) {
-          //if (DEBUG) System.out.println("    arc is final!");
+          if (DEBUG) System.out.println("    arc is final!");
          currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
-          //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+          if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
        }
      }
    }
@ -466,9 +512,16 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
    if (!currentFrame.hasTerms) {
      termExists = false;
      term.length = targetUpto;
-      // if (DEBUG) {
-      //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
-      // }
+       if (DEBUG) {
+         System.out.println("  FAST NOT_FOUND term=" + brToString(term));
+       }
+      return false;
+    }
+
+    if (currentFrame.maxIDVersion < minIDVersion) {
+      // The max version for all terms in this block is lower than the minVersion
+      termExists = false;
+      term.length = targetUpto;
      return false;
    }

@ -476,14 +529,19 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {

    final SeekStatus result = currentFrame.scanToTerm(target, true);            
    if (result == SeekStatus.FOUND) {
-      // if (DEBUG) {
-      //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
-      // }
+       if (DEBUG) {
+         System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
+       }
+      currentFrame.decodeMetaData();
+      if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
+        // The max version for this term is lower than the minVersion
+        return false;
+      }
      return true;
    } else {
-      // if (DEBUG) {
-      //   System.out.println("  got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
-      // }
+       if (DEBUG) {
+         System.out.println("  got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
+       }

      return false;
    }
@ -969,4 +1027,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
  public long ord() {
    throw new UnsupportedOperationException();
  }
+
+  @Override
+  public String toString() {
+    return "IDVersionSegmentTermsEnum(seg=" + fr.parent.segment + ")";
+  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
@ -36,6 +36,7 @@ final class IDVersionSegmentTermsEnumFrame {
  boolean hasTermsOrig;
  boolean isFloor;

+  /** Highest version of any term in this block. */
  long maxIDVersion;

  FST.Arc<Pair<BytesRef,Long>> arc;
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java
@ -27,7 +27,12 @@ class SingleDocsAndPositionsEnum extends DocsAndPositionsEnum {
  private int singleDocID;
  private Bits liveDocs;
  private long version;
-  private final BytesRef payload = new BytesRef(8);
+  private final BytesRef payload;
+
+  public SingleDocsAndPositionsEnum() {
+    payload = new BytesRef(8);
+    payload.length = 8;
+  }

  /** For reuse */
  public void reset(int singleDocID, long version, Bits liveDocs) {
@ -35,7 +40,6 @@ class SingleDocsAndPositionsEnum extends DocsAndPositionsEnum {
    this.liveDocs = liveDocs;
    this.singleDocID = singleDocID;
    this.version = version;
-    pos = -1;
  }

  @Override
@ -45,7 +49,7 @@ class SingleDocsAndPositionsEnum extends DocsAndPositionsEnum {
    } else {
      doc = NO_MORE_DOCS;
    }
-    pos = 0;
+    pos = -1;
    
    return doc;
  }
@ -59,6 +63,7 @@ class SingleDocsAndPositionsEnum extends DocsAndPositionsEnum {
  public int advance(int target) {
    if (doc == -1 && target <= singleDocID && (liveDocs == null || liveDocs.get(singleDocID))) {
      doc = singleDocID;
+      pos = -1;
    } else {
      doc = NO_MORE_DOCS;
    }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
@ -161,7 +161,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
        in.readBytes(code.bytes, 0, numBytes);
        code.length = numBytes;
        final long version = in.readVLong();
-        System.out.println("  read code=" +code + " version=" + version);
        final Pair<BytesRef,Long> rootCode = VersionBlockTreeTermsWriter.FST_OUTPUTS.newPair(code, version);
        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
        assert fieldInfo != null: "field=" + field;
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
@ -194,10 +194,10 @@ import org.apache.lucene.util.packed.PackedInts;
 // nocommit fix jdocs
 final class VersionBlockTreeTermsWriter extends FieldsConsumer {

-  public static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
+  static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
                                                                                 PositiveIntOutputs.getSingleton());

-  public static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput();
+  static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput();

  /** Suggested default value for the {@code
   *  minItemsInBlock} parameter to {@link
@ -284,7 +284,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
  }

  private final List<FieldMetaData> fields = new ArrayList<>();
-  // private final String segment;
+  private final String segment;

  /** Create a new writer.  The number of items (terms or
   *  sub-blocks) per block will aim to be between
@ -297,6 +297,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
                                     int maxItemsInBlock)
    throws IOException
  {
+    System.out.println("VBTTW minItemsInBlock=" + minItemsInBlock + " maxItemsInBlock=" + maxItemsInBlock);
    if (minItemsInBlock <= 1) {
      throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
    }
@ -329,7 +330,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
      writeIndexHeader(indexOut);

      this.postingsWriter = postingsWriter;
-      // segment = state.segmentName;
+      segment = state.segmentInfo.name;

      // System.out.println("BTW.init seg=" + state.segmentName);

@ -625,6 +626,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
    // following floor blocks:

    void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
+      // nocommit why can't we do floor blocks for root frame?
      if (prefixLength == 0 || count <= maxItemsInBlock) {
        // Easy case: not floor block.  Eg, prefix is "foo",
        // and we found 30 terms/sub-blocks starting w/ that
@ -644,13 +646,13 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
        // in each block, to make floor blocks authoritative

        //if (DEBUG) {
-        //  final BytesRef prefix = new BytesRef(prefixLength);
-        //  for(int m=0;m<prefixLength;m++) {
-        //    prefix.bytes[m] = (byte) prevTerm.ints[m];
-        //  }
-        //  prefix.length = prefixLength;
-        //  //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
-        //  System.out.println("writeBlocks: prefix=" + prefix + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
+          final BytesRef prefix = new BytesRef(prefixLength);
+          for(int m=0;m<prefixLength;m++) {
+            prefix.bytes[m] = (byte) prevTerm.ints[m];
+          }
+          prefix.length = prefixLength;
+          //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
+          System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
        //}
        //System.out.println("\nwbs count=" + count);

@ -873,7 +875,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
      out.writeVInt((length<<1)|(isLastInFloor ? 1:0));

      // if (DEBUG) {
-      //   System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
+      System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
      // }

      // 1st pass: pack term suffix bytes into byte[] blob
@ -909,6 +911,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
      boolean absolute = true;
      long maxVersionInBlock = -1;

+      int countx = 0;
      if (isLeafBlock) {
        subIndices = null;
        for (PendingEntry ent : slice) {
@ -918,10 +921,10 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
          maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
          final int suffix = term.term.length - prefixLength;
          // if (DEBUG) {
-          //   BytesRef suffixBytes = new BytesRef(suffix);
-          //   System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-          //   suffixBytes.length = suffix;
-          //   System.out.println("    write term suffix=" + suffixBytes);
+             BytesRef suffixBytes = new BytesRef(suffix);
+             System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+             suffixBytes.length = suffix;
+             System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes));
          // }
          // For leaf block we write suffix straight
          suffixWriter.writeVInt(suffix);
@ -955,10 +958,10 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
            maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
            final int suffix = term.term.length - prefixLength;
            // if (DEBUG) {
-            //   BytesRef suffixBytes = new BytesRef(suffix);
-            //   System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-            //   suffixBytes.length = suffix;
-            //   System.out.println("    write term suffix=" + suffixBytes);
+               BytesRef suffixBytes = new BytesRef(suffix);
+               System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+               suffixBytes.length = suffix;
+               System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes));
            // }
            // For non-leaf block we borrow 1 bit to record
            // if entry is term or sub-block
@ -1005,10 +1008,10 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
            assert block.fp < startFP;

            // if (DEBUG) {
-            //   BytesRef suffixBytes = new BytesRef(suffix);
-            //   System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-            //   suffixBytes.length = suffix;
-            //   System.out.println("    write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
+               BytesRef suffixBytes = new BytesRef(suffix);
+               System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+               suffixBytes.length = suffix;
+               System.out.println("    " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
            // }

            suffixWriter.writeVLong(startFP - block.fp);
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
@ -17,16 +17,32 @@ package org.apache.lucene.codecs.idversion;
 * limitations under the License.
 */

+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.PerThreadPKLookup;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
@ -47,26 +63,219 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    Document doc = new Document();
    doc.add(makeIDField("id0", 100));
    w.addDocument(doc);
+    doc = new Document();
+    doc.add(makeIDField("id1", 110));
+    w.addDocument(doc);
    IndexReader r = w.getReader();
    IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().fields().terms("id").iterator(null);
    assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50));
+    assertTrue(termsEnum.seekExact(new BytesRef("id0"), 100));
    assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101));
+    assertTrue(termsEnum.seekExact(new BytesRef("id1"), 50));
+    assertTrue(termsEnum.seekExact(new BytesRef("id1"), 110));
+    assertFalse(termsEnum.seekExact(new BytesRef("id1"), 111));
    r.close();

    w.close();
    dir.close();
  }

-  // nocommit need testRandom
+  // nocommit vary the style of iD; sometimes fixed-length ids, timestamp, zero filled, seuqential, random, etc.
+
+  public void testRandom() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    // nocommit randomize the block sizes:
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    // nocommit put back
+    //RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    int numDocs = atLeast(1000);
+    Map<String,Long> idValues = new HashMap<String,Long>();
+    int docUpto = 0;
+    if (VERBOSE) {
+      System.out.println("TEST: numDocs=" + numDocs);
+    }
+    long version = 0;
+    while (docUpto < numDocs) {
+      // nocommit add deletes in
+      // nocommit randomRealisticUniode / full binary
+      String idValue = TestUtil.randomSimpleString(random());
+      if (idValues.containsKey(idValue)) {
+        continue;
+      }
+      //long version = random().nextLong() & 0x7fffffffffffffffL;
+      version++;
+      idValues.put(idValue, version);
+      if (VERBOSE) {
+        System.out.println("  " + idValue + " -> " + version);
+      }
+      Document doc = new Document();
+      doc.add(makeIDField(idValue, version));
+      w.addDocument(doc);
+      docUpto++;
+    }
+
+    //IndexReader r = w.getReader();
+    IndexReader r = DirectoryReader.open(w, true);
+    PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
+
+    List<Map.Entry<String,Long>> idValuesList = new ArrayList<>(idValues.entrySet());
+    int iters = numDocs * 5;
+    for(int iter=0;iter<iters;iter++) {
+      String idValue;
+
+      if (random().nextBoolean()) {
+        idValue = idValuesList.get(random().nextInt(numDocs)).getKey();
+      } else {
+        idValue = TestUtil.randomSimpleString(random());
+      }
+
+      BytesRef idValueBytes = new BytesRef(idValue);
+
+      Long expectedVersion = idValues.get(idValue);
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " id=" + idValue + " expectedVersion=" + expectedVersion);
+      }
+      
+      if (expectedVersion == null) {
+        assertEquals(-1, lookup.lookup(idValueBytes));
+      } else {
+        if (random().nextBoolean()) {
+          if (VERBOSE) {
+            System.out.println("  lookup exact version (should be found)");
+          }
+          assertTrue(lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
+        } else {
+          if (VERBOSE) {
+            System.out.println("  lookup version+1 (should not be found)");
+          }
+          assertEquals(-1, lookup.lookup(idValueBytes, expectedVersion.longValue()+1));
+        }
+      }
+    }
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  private static class PerThreadVersionPKLookup extends PerThreadPKLookup {
+    public PerThreadVersionPKLookup(IndexReader r, String field) throws IOException {
+      super(r, field);
+    }
+
+    /** Returns docID if found, else -1. */
+    public int lookup(BytesRef id, long version) throws IOException {
+      for(int seg=0;seg<numSegs;seg++) {
+        if (((IDVersionSegmentTermsEnum) termsEnums[seg]).seekExact(id, version)) {
+          if (VERBOSE) {
+            System.out.println("  found in seg=" + termsEnums[seg]);
+          }
+          docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
+          int docID = docsEnums[seg].nextDoc();
+          if (docID != DocsEnum.NO_MORE_DOCS) {
+            return docBases[seg] + docID;
+          }
+          assert hasDeletions;
+        }
+      }
+
+      return -1;
+    }
+  }
+
+  /** Produces a single token from the provided value, with the provided payload. */
+  private static class StringAndPayloadField extends Field {
+
+    public static final FieldType TYPE = new FieldType();
+
+    static {
+      TYPE.setIndexed(true);
+      TYPE.setOmitNorms(true);
+      TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+      TYPE.setTokenized(true);
+      TYPE.freeze();
+    }
+
+    private final BytesRef payload;
+
+    public StringAndPayloadField(String name, String value, BytesRef payload) {
+      super(name, value, TYPE);
+      this.payload = payload;
+    }
+
+    @Override
+    public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
+      SingleTokenWithPayloadTokenStream ts;
+      if (reuse instanceof SingleTokenWithPayloadTokenStream) {
+        ts = (SingleTokenWithPayloadTokenStream) reuse;
+      } else {
+        ts = new SingleTokenWithPayloadTokenStream();
+      }
+      ts.setValue((String) fieldsData, payload);
+      return ts;
+    }
+  }
+
+  private static final class SingleTokenWithPayloadTokenStream extends TokenStream {
+
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+    private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class);
+    private boolean used = false;
+    private String value = null;
+    private BytesRef payload;
+    
+    /** Creates a new TokenStream that returns a String+payload as single token.
+     * <p>Warning: Does not initialize the value, you must call
+     * {@link #setValue(String)} afterwards!
+     */
+    SingleTokenWithPayloadTokenStream() {
+    }
+    
+    /** Sets the string value. */
+    void setValue(String value, BytesRef payload) {
+      this.value = value;
+      this.payload = payload;
+    }
+
+    @Override
+    public boolean incrementToken() {
+      if (used) {
+        return false;
+      }
+      clearAttributes();
+      termAttribute.append(value);
+      payloadAttribute.setPayload(payload);
+      used = true;
+      return true;
+    }
+
+    @Override
+    public void reset() {
+      used = false;
+    }
+
+    @Override
+    public void close() {
+      value = null;
+      payload = null;
+    }
+  }

  private static Field makeIDField(String id, long version) {
-    Field field = newTextField("id", "", Field.Store.NO);
-    Token token = new Token(id, 0, id.length());
    BytesRef payload = new BytesRef(8);
    payload.length = 8;
-    IDVersionPostingsFormat.longToBytes(100, payload);
+    IDVersionPostingsFormat.longToBytes(version, payload);
+    return new StringAndPayloadField("id", id, payload);
+
+    /*
+    Field field = newTextField("id", "", Field.Store.NO);
+    Token token = new Token(id, 0, id.length());
    token.setPayload(payload);
    field.setTokenStream(new CannedTokenStream(token));
    return field;
+    */
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@ -46,6 +46,7 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.NoOutputs;
+import org.apache.lucene.util.fst.Outputs;
 import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.PackedInts;

@ -189,6 +190,10 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 public final class BlockTreeTermsWriter extends FieldsConsumer {

+  static final Outputs<BytesRef> FST_OUTPUTS = ByteSequenceOutputs.getSingleton();
+
+  static final BytesRef NO_OUTPUT = FST_OUTPUTS.getNoOutput();
+
  /** Suggested default value for the {@code
   *  minItemsInBlock} parameter to {@link
   *  #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
@ -40,9 +40,6 @@ import org.apache.lucene.util.fst.Util;
 /** Iterates through terms in this field */
 final class SegmentTermsEnum extends TermsEnum {

-  final static Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
-  final static BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
-
  // Lazy init:
  IndexInput in;

@ -366,8 +363,8 @@ final class SegmentTermsEnum extends TermsEnum {
        //System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
        //}
        assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
-        if (arc.output != NO_OUTPUT) {
-          output = fstOutputs.add(output, arc.output);
+        if (arc.output != BlockTreeTermsWriter.NO_OUTPUT) {
+          output = BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
        }
        if (arc.isFinal()) {
          lastFrame = stack[1+lastFrame.ord];
@ -457,7 +454,7 @@ final class SegmentTermsEnum extends TermsEnum {

      //term.length = 0;
      targetUpto = 0;
-      currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+      currentFrame = pushFrame(arc, BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
    }

    // if (DEBUG) {
@ -512,8 +509,8 @@ final class SegmentTermsEnum extends TermsEnum {
        term.bytes[targetUpto] = (byte) targetLabel;
        // Aggregate output as we go:
        assert arc.output != null;
-        if (arc.output != NO_OUTPUT) {
-          output = fstOutputs.add(output, arc.output);
+        if (arc.output != BlockTreeTermsWriter.NO_OUTPUT) {
+          output = BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
        }

        // if (DEBUG) {
@ -523,7 +520,7 @@ final class SegmentTermsEnum extends TermsEnum {

        if (arc.isFinal()) {
          //if (DEBUG) System.out.println("    arc is final!");
-          currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+          currentFrame = pushFrame(arc, BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
          //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
        }
      }
@ -628,8 +625,8 @@ final class SegmentTermsEnum extends TermsEnum {
        // seek; but, often the FST doesn't have any
        // shared bytes (but this could change if we
        // reverse vLong byte order)
-        if (arc.output != NO_OUTPUT) {
-          output = fstOutputs.add(output, arc.output);
+        if (arc.output != BlockTreeTermsWriter.NO_OUTPUT) {
+          output = BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
        }
        if (arc.isFinal()) {
          lastFrame = stack[1+lastFrame.ord];
@ -714,7 +711,7 @@ final class SegmentTermsEnum extends TermsEnum {

      //term.length = 0;
      targetUpto = 0;
-      currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+      currentFrame = pushFrame(arc, BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
    }

    //if (DEBUG) {
@ -769,8 +766,8 @@ final class SegmentTermsEnum extends TermsEnum {
        arc = nextArc;
        // Aggregate output as we go:
        assert arc.output != null;
-        if (arc.output != NO_OUTPUT) {
-          output = fstOutputs.add(output, arc.output);
+        if (arc.output != BlockTreeTermsWriter.NO_OUTPUT) {
+          output = BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
        }

        //if (DEBUG) {
@ -780,7 +777,7 @@ final class SegmentTermsEnum extends TermsEnum {

        if (arc.isFinal()) {
          //if (DEBUG) System.out.println("    arc is final!");
-          currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+          currentFrame = pushFrame(arc, BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
          //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
        }
      }
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@ -966,74 +966,4 @@ public class TestTermsEnum extends LuceneTestCase {
    w.close();
    d.close();
  }
-
-  /** Utility class to do efficient primary-key (only 1 doc contains the
-   *  given term) lookups by segment, re-using the enums.  This class is
-   *  not thread safe, so it is the caller's job to create and use one
-   *  instance of this per thread.  Do not use this if a term may appear
-   *  in more than one document!  It will only return the first one it
-   *  finds. */
-  static class PerThreadPKLookup {
-
-    private final TermsEnum[] termsEnums;
-    private final DocsEnum[] docsEnums;
-    private final Bits[] liveDocs;
-    private final int[] docBases;
-    private final int numSegs;
-    private final boolean hasDeletions;
-
-    public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {
-
-      List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
-
-      // Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
-      Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
-          @Override
-          public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
-            return c2.reader().numDocs() - c1.reader().numDocs();
-          }
-        });
-
-      termsEnums = new TermsEnum[leaves.size()];
-      docsEnums = new DocsEnum[leaves.size()];
-      liveDocs = new Bits[leaves.size()];
-      docBases = new int[leaves.size()];
-      int numSegs = 0;
-      boolean hasDeletions = false;
-      for(int i=0;i<leaves.size();i++) {
-        Fields fields = leaves.get(i).reader().fields();
-        if (fields != null) {
-          Terms terms = fields.terms(idFieldName);
-          if (terms != null) {
-            termsEnums[numSegs] = terms.iterator(null);
-            assert termsEnums[numSegs] != null;
-            docBases[numSegs] = leaves.get(i).docBase;
-            liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
-            hasDeletions |= leaves.get(i).reader().hasDeletions();
-            numSegs++;
-          }
-        }
-      }
-      this.numSegs = numSegs;
-      this.hasDeletions = hasDeletions;
-    }
-    
-    /** Returns docID if found, else -1. */
-    public int lookup(BytesRef id) throws IOException {
-      for(int seg=0;seg<numSegs;seg++) {
-        if (termsEnums[seg].seekExact(id)) {
-          docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
-          int docID = docsEnums[seg].nextDoc();
-          if (docID != DocsEnum.NO_MORE_DOCS) {
-            return docBases[seg] + docID;
-          }
-          assert hasDeletions;
-        }
-      }
-
-      return -1;
-    }
-
-    // TODO: add reopen method to carry over re-used enums...?
-  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/PerThreadPKLookup.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/PerThreadPKLookup.java
@ -0,0 +1,97 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/** Utility class to do efficient primary-key (only 1 doc contains the
+ *  given term) lookups by segment, re-using the enums.  This class is
+ *  not thread safe, so it is the caller's job to create and use one
+ *  instance of this per thread.  Do not use this if a term may appear
+ *  in more than one document!  It will only return the first one it
+ *  finds. */
+public class PerThreadPKLookup {
+
+  protected final TermsEnum[] termsEnums;
+  protected final DocsEnum[] docsEnums;
+  protected final Bits[] liveDocs;
+  protected final int[] docBases;
+  protected final int numSegs;
+  protected final boolean hasDeletions;
+
+  public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {
+
+    List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
+
+    // Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
+    Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
+        @Override
+        public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
+          return c2.reader().numDocs() - c1.reader().numDocs();
+        }
+      });
+
+    termsEnums = new TermsEnum[leaves.size()];
+    docsEnums = new DocsEnum[leaves.size()];
+    liveDocs = new Bits[leaves.size()];
+    docBases = new int[leaves.size()];
+    int numSegs = 0;
+    boolean hasDeletions = false;
+    for(int i=0;i<leaves.size();i++) {
+      Fields fields = leaves.get(i).reader().fields();
+      if (fields != null) {
+        Terms terms = fields.terms(idFieldName);
+        if (terms != null) {
+          termsEnums[numSegs] = terms.iterator(null);
+          assert termsEnums[numSegs] != null;
+          docBases[numSegs] = leaves.get(i).docBase;
+          liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
+          hasDeletions |= leaves.get(i).reader().hasDeletions();
+          numSegs++;
+        }
+      }
+    }
+    this.numSegs = numSegs;
+    this.hasDeletions = hasDeletions;
+  }
+    
+  /** Returns docID if found, else -1. */
+  public int lookup(BytesRef id) throws IOException {
+    for(int seg=0;seg<numSegs;seg++) {
+      if (termsEnums[seg].seekExact(id)) {
+        docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
+        int docID = docsEnums[seg].nextDoc();
+        if (docID != DocsEnum.NO_MORE_DOCS) {
+          return docBases[seg] + docID;
+        }
+        assert hasDeletions;
+      }
+    }
+
+    return -1;
+  }
+
+  // TODO: add reopen method to carry over re-used enums...?
+}