LUCENE-5675: fix nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596512 13f79535-47bb-0310-9956-ffa450edef68
2014-05-21 08:57:59 +00:00 · 2014-05-21 08:57:59 +00:00 · 18d2cfaf9c
parent d6968c3924
commit 18d2cfaf9c
13 changed files with 184 additions and 269 deletions
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
@ -80,6 +80,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
    lastDocID = docID;
    lastPosition = -1;
    lastVersion = -1;
  }
  @Override
@ -94,7 +95,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
    if (payload.length != 8) {
      throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")");
    }
-    
+
    lastVersion = IDVersionPostingsFormat.bytesToLong(payload);
    if (lastVersion < 0) {
      throw new IllegalArgumentException("version must be >= 0 (got: " + lastVersion + "; payload=" + payload + ")");
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@ -230,15 +230,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
    return ((IDVersionTermState) currentFrame.state).idVersion;
  }
-  /** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */
+  /** Returns false if the term does not exist, or it exists but its version is too old (< minIDVersion). */
  public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
    }
    // nocommit would be nice if somehow on doing deletes we didn't have to double-lookup again...
    if (term.bytes.length <= target.length) {
      term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
    }
@ -260,7 +258,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
    boolean changed = false;
-    // nocommit we could stop earlier w/ the version check, every time we traverse an index arc we can check?
+    // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can check?
    if (currentFrame != staticFrame) {
@ -380,7 +378,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
            }
            return false;
          }
-          System.out.println("  term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
+          // System.out.println("  term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
           if (DEBUG) {
             System.out.println("  target is same as current; return true");
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
@ -220,11 +220,10 @@ final class IDVersionSegmentTermsEnumFrame {
  }
  void rewind() {
    // Force reload:
    fp = fpOrig;
    nextEnt = -1;
    // nocommit move to BT too?
    //state.termBlockOrd = 0;
    hasTerms = hasTermsOrig;
    if (isFloor) {
      floorDataReader.rewind();
@ -390,8 +389,7 @@ final class IDVersionSegmentTermsEnumFrame {
  public void decodeMetaData() throws IOException {
-    //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
+    //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
    System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
    assert nextEnt >= 0;
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
@ -136,9 +136,7 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
      }
      // verify
-      if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
+      CodecUtil.checksumEntireFile(indexIn);
        CodecUtil.checksumEntireFile(indexIn);
      }
      // Have PostingsReader init itself
      postingsReader.init(in);
@ -167,15 +165,10 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
        final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
        final long sumDocFreq = in.readVLong();
        final int docCount = in.readVInt();
-        final int longsSize = version >= VersionBlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
+        final int longsSize = in.readVInt();
-        BytesRef minTerm, maxTerm;
+        BytesRef minTerm = readBytesRef(in);
-        if (version >= VersionBlockTreeTermsWriter.VERSION_MIN_MAX_TERMS) {
+        BytesRef maxTerm = readBytesRef(in);
          minTerm = readBytesRef(in);
          maxTerm = readBytesRef(in);
        } else {
          minTerm = maxTerm = null;
        }
        if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
          throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
        }
@ -217,9 +210,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
    int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_CODEC_NAME,
                          VersionBlockTreeTermsWriter.VERSION_START,
                          VersionBlockTreeTermsWriter.VERSION_CURRENT);
    if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
      dirOffset = input.readLong();
    }
    return version;
  }
@ -228,22 +218,14 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
    int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
                          VersionBlockTreeTermsWriter.VERSION_START,
                          VersionBlockTreeTermsWriter.VERSION_CURRENT);
    if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
      indexDirOffset = input.readLong(); 
    }
    return version;
  }
  /** Seek {@code input} to the directory offset. */
  private void seekDir(IndexInput input, long dirOffset)
      throws IOException {
-    if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
+    input.seek(input.length() - CodecUtil.footerLength() - 8);
-      input.seek(input.length() - CodecUtil.footerLength() - 8);
+    dirOffset = input.readLong();
      dirOffset = input.readLong();
    } else if (version >= VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
      input.seek(input.length() - 8);
      dirOffset = input.readLong();
    }
    input.seek(dirOffset);
  }
@ -306,12 +288,10 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
  @Override
  public void checkIntegrity() throws IOException {
-    if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {      
+    // term dictionary
-      // term dictionary
+    CodecUtil.checksumEntireFile(in);
      CodecUtil.checksumEntireFile(in);
-      // postings
+    // postings
-      postingsReader.checkIntegrity();
+    postingsReader.checkIntegrity();
    }
  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
@ -84,118 +84,19 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 /**
- * Block-based terms index and dictionary writer.
+ * This is just like {@link BlockTreeTermsWriter}, except it also stores a version per term, and adds a method to its TermsEnum
- * <p>
+ * implementation to seekExact only if the version is >= the specified version.  The version is added to the terms index to avoid seeking if
- * Writes terms dict and index, block-encoding (column
+ * no term in the block has a high enough version.  The term blocks file is .tiv and the terms index extension is .tipv.
 * stride) each term's metadata for each set of terms
 * between two index terms.
 * <p>
 * Files:
 * <ul>
 *   <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
 *   <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
 * </ul>
 * <p>
 * <a name="Termdictionary" id="Termdictionary"></a>
 * <h3>Term Dictionary</h3>
 *
 * <p>The .tim file contains the list of terms in each
 * field along with per-term statistics (such as docfreq)
 * and per-term metadata (typically pointers to the postings list
 * for that term in the inverted index).
 * </p>
 *
 * <p>The .tim is arranged in blocks: with blocks containing
 * a variable number of entries (by default 25-48), where
 * each entry is either a term or a reference to a
 * sub-block.</p>
 *
 * <p>NOTE: The term dictionary can plug into different postings implementations:
 * the postings writer/reader are actually responsible for encoding 
 * and decoding the Postings Metadata and Term Metadata sections.</p>
 *
 * <ul>
 *    <li>TermsDict (.tim) --&gt; Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
 *                               FieldSummary, DirOffset, Footer</li>
 *    <li>NodeBlock --&gt; (OuterNode | InnerNode)</li>
 *    <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata</i>&gt;<sup>EntryCount</sup></li>
 *    <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ? </i>&gt;<sup>EntryCount</sup></li>
 *    <li>TermStats --&gt; DocFreq, TotalTermFreq </li>
 *    <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
 *                            SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm, MaxTerm&gt;<sup>NumFields</sup></li>
 *    <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *    <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
 *    <li>MinTerm,MaxTerm --&gt; {@link DataOutput#writeVInt VInt} length followed by the byte[]</li>
 *    <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
 *        FieldNumber,RootCodeLength,DocCount,LongsSize --&gt; {@link DataOutput#writeVInt VInt}</li>
 *    <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --&gt; 
 *        {@link DataOutput#writeVLong VLong}</li>
 *    <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *    <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
 *        for the BlockTree implementation.</li>
 *    <li>DirOffset is a pointer to the FieldSummary section.</li>
 *    <li>DocFreq is the count of documents which contain the term.</li>
 *    <li>TotalTermFreq is the total number of occurrences of the term. This is encoded
 *        as the difference between the total number of occurrences and the DocFreq.</li>
 *    <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)</li>
 *    <li>NumTerms is the number of unique terms for the field.</li>
 *    <li>RootCode points to the root block for the field.</li>
 *    <li>SumDocFreq is the total number of postings, the number of term-document pairs across
 *        the entire field.</li>
 *    <li>DocCount is the number of documents that have at least one posting for this field.</li>
 *    <li>LongsSize records how many long values the postings writer/reader record per term
 *        (e.g., to hold freq/prox/doc file offsets).
 *    <li>MinTerm, MaxTerm are the lowest and highest term in this field.</li>
 *    <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
 *        these contain arbitrary per-file data (such as parameters or versioning information) 
 *        and per-term data (such as pointers to inverted files).</li>
 *    <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points
 *        to child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted </li>
 * </ul>
 * <a name="Termindex" id="Termindex"></a>
 * <h3>Term Index</h3>
 * <p>The .tip file contains an index into the term dictionary, so that it can be 
 * accessed randomly.  The index is also used to determine
 * when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
 * <ul>
 *   <li>TermsIndex (.tip) --&gt; Header, FSTIndex<sup>NumFields</sup>
 *                                &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset, Footer</li>
 *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
 *   <li>IndexStartFP --&gt; {@link DataOutput#writeVLong VLong}</li>
 *   <!-- TODO: better describe FST output here -->
 *   <li>FSTIndex --&gt; {@link FST FST&lt;byte[]&gt;}</li>
 *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *   <li>The .tip file contains a separate FST for each
 *       field.  The FST maps a term prefix to the on-disk
 *       block that holds all terms starting with that
 *       prefix.  Each field's IndexStartFP points to its
 *       FST.</li>
 *   <li>DirOffset is a pointer to the start of the IndexStartFPs
 *       for all fields</li>
 *   <li>It's possible that an on-disk block would contain
 *       too many terms (more than the allowed maximum
 *       (default: 48)).  When this happens, the block is
 *       sub-divided into new blocks (called "floor
 *       blocks"), and then the output in the FST for the
 *       block's prefix encodes the leading byte of each
 *       sub-block, and its file pointer.
 * </ul>
 *
 * @see BlockTreeTermsReader
 * @lucene.experimental
 */
-// nocommit fix jdocs
+
 final class VersionBlockTreeTermsWriter extends FieldsConsumer {
  private static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
  static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
-                                                                                 PositiveIntOutputs.getSingleton());
+                                                                          PositiveIntOutputs.getSingleton());
  static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput();
@ -224,25 +125,11 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
  /** Initial terms format. */
  public static final int VERSION_START = 0;
  // nocommit nuke all these old versions
  /** Append-only */
  public static final int VERSION_APPEND_ONLY = 1;
  /** Meta data as array */
  public static final int VERSION_META_ARRAY = 2;
  /** checksums */
  public static final int VERSION_CHECKSUM = 3;
  /** min/max term */
  public static final int VERSION_MIN_MAX_TERMS = 4;
  /** Current terms format. */
-  public static final int VERSION_CURRENT = VERSION_MIN_MAX_TERMS;
+  public static final int VERSION_CURRENT = VERSION_START;
  /** Extension of terms index file */
-  static final String TERMS_INDEX_EXTENSION = "tip";
+  static final String TERMS_INDEX_EXTENSION = "tipv";
  final static String TERMS_INDEX_CODEC_NAME = "VERSION_BLOCK_TREE_TERMS_INDEX";
  private final IndexOutput out;
@ -297,7 +184,6 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
                                     int maxItemsInBlock)
    throws IOException
  {
    System.out.println("VBTTW minItemsInBlock=" + minItemsInBlock + " maxItemsInBlock=" + maxItemsInBlock);
    if (minItemsInBlock <= 1) {
      throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
    }
@ -626,8 +512,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
    // following floor blocks:
    void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
-      // nocommit why can't we do floor blocks for root frame?
+      if (count <= maxItemsInBlock) {
      if (prefixLength == 0 || count <= maxItemsInBlock) {
        // Easy case: not floor block.  Eg, prefix is "foo",
        // and we found 30 terms/sub-blocks starting w/ that
        // prefix, and minItemsInBlock <= 30 <=
@ -645,7 +530,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
        // TODO: we could store min & max suffix start byte
        // in each block, to make floor blocks authoritative
-        //if (DEBUG) {
+        if (DEBUG) {
          final BytesRef prefix = new BytesRef(prefixLength);
          for(int m=0;m<prefixLength;m++) {
            prefix.bytes[m] = (byte) prevTerm.ints[m];
@ -653,7 +538,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
          prefix.length = prefixLength;
          //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
          System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
-        //}
+        }
        //System.out.println("\nwbs count=" + count);
        final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
@ -874,9 +759,9 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
      // Write block header:
      out.writeVInt((length<<1)|(isLastInFloor ? 1:0));
-      // if (DEBUG) {
+      if (DEBUG) {
-      System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
+        System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
-      // }
+      }
      // 1st pass: pack term suffix bytes into byte[] blob
      // TODO: cutover to bulk int codec... simple64?
@ -920,12 +805,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
          BlockTermState state = term.state;
          maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
          final int suffix = term.term.length - prefixLength;
-          // if (DEBUG) {
+          if (DEBUG) {
             BytesRef suffixBytes = new BytesRef(suffix);
             System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
             suffixBytes.length = suffix;
             System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes));
-          // }
+          }
          // For leaf block we write suffix straight
          suffixWriter.writeVInt(suffix);
          suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
@ -957,12 +842,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
            BlockTermState state = term.state;
            maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
            final int suffix = term.term.length - prefixLength;
-            // if (DEBUG) {
+            if (DEBUG) {
               BytesRef suffixBytes = new BytesRef(suffix);
               System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
               suffixBytes.length = suffix;
               System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes));
-            // }
+            }
            // For non-leaf block we borrow 1 bit to record
            // if entry is term or sub-block
            suffixWriter.writeVInt(suffix<<1);
@ -1007,12 +892,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
            suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
            assert block.fp < startFP;
-            // if (DEBUG) {
+            if (DEBUG) {
               BytesRef suffixBytes = new BytesRef(suffix);
               System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
               suffixBytes.length = suffix;
               System.out.println("    " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
-            // }
+            }
            suffixWriter.writeVLong(startFP - block.fp);
            subIndices.add(block.index);
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java
@ -28,7 +28,7 @@ import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.util.BytesRef;
-// nocommit can we take a BytesRef token instead?
+// TODO: can we take a BytesRef token instead?
 /** Produces a single String token from the provided value, with the provided payload. */
 class StringAndPayloadField extends Field {
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
@ -49,6 +49,7 @@ import org.apache.lucene.index.PerThreadPKLookup;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@ -90,16 +91,16 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    String next();
  }
-  // nocommit make a similar test for BT, w/ varied IDs:
+  // TODO make a similar test for BT, w/ varied IDs:
  public void testRandom() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    // nocommit randomize the block sizes:
+    int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
-    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    int maxItemsInBlock = 2*(minItemsInBlock-1) + random().nextInt(50);
-    // nocommit put back
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
-    //RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
-    IndexWriter w = new IndexWriter(dir, iwc);
+    //IndexWriter w = new IndexWriter(dir, iwc);
    int numDocs = atLeast(1000);
    Map<String,Long> idValues = new HashMap<String,Long>();
    int docUpto = 0;
@ -210,9 +211,10 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
    }
    List<String> idsList = new ArrayList<>();
    long version = 0;
    while (docUpto < numDocs) {
      // nocommit add deletes in
      String idValue = idPrefix + ids.next();
      if (idValues.containsKey(idValue)) {
        continue;
@ -229,11 +231,38 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      Document doc = new Document();
      doc.add(makeIDField(idValue, version));
      w.addDocument(doc);
      idsList.add(idValue);
      if (idsList.size() > 0 && random().nextInt(7) == 5) {
        // Randomly delete or update a previous ID
        idValue = idsList.get(random().nextInt(idsList.size()));
        if (random().nextBoolean()) {
          if (useMonotonicVersion) {
            version += TestUtil.nextInt(random(), 1, 10);
          } else {
            version = random().nextLong() & 0x7fffffffffffffffL;
          }
          doc = new Document();
          doc.add(makeIDField(idValue, version));
          if (VERBOSE) {
            System.out.println("  update " + idValue + " -> " + version);
          }
          w.updateDocument(new Term("id", idValue), doc);
          idValues.put(idValue, version);
        } else {
          if (VERBOSE) {
            System.out.println("  delete " + idValue);
          }
          w.deleteDocuments(new Term("id", idValue));
          idValues.remove(idValue);
        }        
      }
      docUpto++;
    }
-    //IndexReader r = w.getReader();
+    IndexReader r = w.getReader();
-    IndexReader r = DirectoryReader.open(w, true);
+    //IndexReader r = DirectoryReader.open(w, true);
    PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
    List<Map.Entry<String,Long>> idValuesList = new ArrayList<>(idValues.entrySet());
@ -242,7 +271,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      String idValue;
      if (random().nextBoolean()) {
-        idValue = idValuesList.get(random().nextInt(numDocs)).getKey();
+        idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
      } else if (random().nextBoolean()) {
        idValue = ids.next();
      } else {
@ -318,14 +347,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    payload.length = 8;
    IDVersionPostingsFormat.longToBytes(version, payload);
    return new StringAndPayloadField("id", id, payload);
    /*
    Field field = newTextField("id", "", Field.Store.NO);
    Token token = new Token(id, 0, id.length());
    token.setPayload(payload);
    field.setTokenStream(new CannedTokenStream(token));
    return field;
    */
  }
  public void testMoreThanOneDocPerIDOneSegment() throws Exception {
@ -353,6 +374,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    iwc.setMergePolicy(new TieredMergePolicy());
    MergeScheduler ms = iwc.getMergeScheduler();
    if (ms instanceof ConcurrentMergeScheduler) {
      iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
@ -362,7 +384,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
          }
        });
    }
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(makeIDField("id", 17));
    w.addDocument(doc);
@ -380,7 +402,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      // expected
      assertTrue(ioe.getCause() instanceof IllegalArgumentException);
    }
-    w.w.close();
+    w.close();
    dir.close();
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
@ -37,7 +37,7 @@ import org.apache.lucene.util.Bits;
 *  time. 
 *  @lucene.experimental */
-// nocommit mv under blocktree?  but ... it's used by others (e.g. block terms)
+// TODO: maybe move under blocktree?  but it's used by other terms dicts (e.g. Block)
 // TODO: find a better name; this defines the API that the
 // terms dict impls use to talk to a postings impl.
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -1640,22 +1640,15 @@ public class CheckIndex {
          // Again, with the one doc deleted:
          checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);
-          // Only agg stats if the doc is live:
+          if (liveDocs != null && liveDocs.get(j) == false) {
-          final boolean doStats = liveDocs == null || liveDocs.get(j);
+            // Only check live docs
          if (doStats == false) {
            // nocommit is it OK to stop verifying deleted docs?
            continue;
          }
-          if (doStats) {
+          status.docCount++;
            status.docCount++;
          }
          for(String field : tfv) {
-            if (doStats) {
+            status.totVectors++;
              status.totVectors++;
            }
            // Make sure FieldInfo thinks this field is vector'd:
            final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -94,6 +94,8 @@ final class DefaultIndexingChain extends DocConsumer {
    // aborting on any exception from this method
    int numDocs = state.segmentInfo.getDocCount();
    // TODO: we could set liveDocs earlier and then fix DVs to also not write deleted docs:
    writeNorms(state);
    writeDocValues(state);
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@ -34,7 +34,6 @@ final class FreqProxTermsWriter extends TermsHash {
  }
  private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
    System.out.println("applyDeletes segUpdates=" + state.segUpdates);
    // Process any pending Term deletes for this newly
    // flushed segment:
@ -108,8 +107,6 @@ final class FreqProxTermsWriter extends TermsHash {
      fields.setLiveDocs(state.liveDocs);
    }
    System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat());
    FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
    boolean success = false;
    try {
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -1696,7 +1696,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
    if (doWait) {
      synchronized(this) {
        while(true) {
          if (hitOOM) {
            throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
          }
--- a/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java
+++ b/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java
@ -17,8 +17,7 @@ package org.apache.lucene.uninverting;
 * limitations under the License.
 */
-import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@ -47,14 +46,17 @@ import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
 public class TestFieldCacheVsDocValues extends LuceneTestCase {
  public void testByteMissingVsFieldCache() throws Exception {
@ -315,14 +317,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    // delete some docs
    // nocommit hmmm what to do
    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    */
    writer.shutdown();
    // compare
@ -331,7 +330,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
      AtomicReader r = context.reader();
      SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
      SortedDocValues actual = r.getSortedDocValues("dv");
-      assertEquals(r.maxDoc(), expected, actual);
+      assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
    }
    ir.close();
    dir.close();
@ -382,14 +381,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    // delete some docs
    // nocommit hmmm what to do
    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    */
    // compare per-segment
    DirectoryReader ir = writer.getReader();
@ -397,7 +393,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
      AtomicReader r = context.reader();
      SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
      SortedSetDocValues actual = r.getSortedSetDocValues("dv");
-      assertEquals(r.maxDoc(), expected, actual);
+      assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
    }
    ir.close();
@ -408,7 +404,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    AtomicReader ar = getOnlySegmentReader(ir);
    SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
    SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
-    assertEquals(ir.maxDoc(), expected, actual);
+    assertEquals(ir.maxDoc(), ar.getLiveDocs(), expected, actual);
    ir.close();
    writer.shutdown();
@ -449,14 +445,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    // delete some docs
    // nocommit hmmm what to do
    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    */
    // merge some segments and ensure that at least one of them has more than
    // 256 values
@ -496,102 +489,149 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
  }
-  private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
+  private void assertEquals(int maxDoc, Bits liveDocs, SortedDocValues expected, SortedDocValues actual) throws Exception {
-    assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
+    assertEquals(maxDoc, liveDocs, DocValues.singleton(expected), DocValues.singleton(actual));
  }
-  private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
+  private void assertEquals(int maxDoc, Bits liveDocs, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
    // can be null for the segment if no docs actually had any SortedDocValues
    // in this case FC.getDocTermsOrds returns EMPTY
    if (actual == null) {
      assertEquals(DocValues.EMPTY_SORTED_SET, expected);
      return;
    }
-    assertEquals(expected.getValueCount(), actual.getValueCount());
+
-    // compare ord lists
+    FixedBitSet liveOrdsExpected = new FixedBitSet((int) expected.getValueCount());
    FixedBitSet liveOrdsActual = new FixedBitSet((int) actual.getValueCount());
    BytesRef expectedBytes = new BytesRef();
    BytesRef actualBytes = new BytesRef();
    // compare values for all live docs:
    for (int i = 0; i < maxDoc; i++) {
      if (liveDocs != null && liveDocs.get(i) == false) {
        // Don't check deleted docs
        continue;
      }
      expected.setDocument(i);
      actual.setDocument(i);
      long expectedOrd;
      while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
-        assertEquals(expectedOrd, actual.nextOrd());
+        expected.lookupOrd(expectedOrd, expectedBytes);
        long actualOrd = actual.nextOrd();
        assertTrue(actualOrd != NO_MORE_ORDS);
        actual.lookupOrd(actualOrd, actualBytes);
        assertEquals(expectedBytes, actualBytes);
        liveOrdsExpected.set((int) expectedOrd);
        liveOrdsActual.set((int) actualOrd);
      }
      assertEquals(NO_MORE_ORDS, actual.nextOrd());
    }
    // Make sure both have same number of non-deleted values:
    assertEquals(liveOrdsExpected.cardinality(), liveOrdsActual.cardinality());
    // compare ord dictionary
-    BytesRef expectedBytes = new BytesRef();
+    int expectedOrd = 0;
-    BytesRef actualBytes = new BytesRef();
+    int actualOrd = 0;
-    for (long i = 0; i < expected.getValueCount(); i++) {
+    while (expectedOrd < expected.getValueCount()) {
-      expected.lookupTerm(expectedBytes);
+      expectedOrd = liveOrdsExpected.nextSetBit(expectedOrd);
-      actual.lookupTerm(actualBytes);
+      if (expectedOrd == -1) {
        break;
      }
      actualOrd = liveOrdsActual.nextSetBit(actualOrd);
      expected.lookupOrd(expectedOrd, expectedBytes);
      actual.lookupOrd(actualOrd, actualBytes);
      assertEquals(expectedBytes, actualBytes);
      expectedOrd++;
      actualOrd++;
    }
    assertTrue(actualOrd == actual.getValueCount() || liveOrdsActual.nextSetBit(actualOrd) == -1);
    // compare termsenum
-    assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
+    assertEquals(expected.getValueCount(), expected.termsEnum(), liveOrdsExpected, actual.termsEnum(), liveOrdsActual);
  }
-  
+
-  private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
+  /** Does termsEnum.next() but then skips over deleted ords. */
  private static BytesRef next(TermsEnum termsEnum, Bits liveOrds) throws IOException {
    while (termsEnum.next() != null) {
      if (liveOrds.get((int) termsEnum.ord())) {
        return termsEnum.term();
      }
    }
    return null;
  }
  /** Does termsEnum.seekCeil() but then skips over deleted ords. */
  private static SeekStatus seekCeil(TermsEnum termsEnum, BytesRef term, Bits liveOrds) throws IOException {
    SeekStatus status = termsEnum.seekCeil(term);
    if (status == SeekStatus.END) {
      return status;
    } else {
      if (liveOrds.get((int) termsEnum.ord()) == false) {
        while (termsEnum.next() != null) {
          if (liveOrds.get((int) termsEnum.ord())) {
            return SeekStatus.NOT_FOUND;
          }
        }
        return SeekStatus.END;
      } else {
        return status;
      }
    }
  }
  private void assertEquals(long numOrds, TermsEnum expected, Bits liveOrdsExpected, TermsEnum actual, Bits liveOrdsActual) throws Exception {
    BytesRef ref;
    // sequential next() through all terms
-    while ((ref = expected.next()) != null) {
+    while ((ref = next(expected, liveOrdsExpected)) != null) {
-      assertEquals(ref, actual.next());
+      assertEquals(ref, next(actual, liveOrdsActual));
      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    assertNull(actual.next());
    // sequential seekExact(ord) through all terms
    for (long i = 0; i < numOrds; i++) {
      expected.seekExact(i);
      actual.seekExact(i);
      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    assertNull(next(actual, liveOrdsActual));
    // sequential seekExact(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
      if (liveOrdsExpected.get((int) i) == false) {
        continue;
      }
      expected.seekExact(i);
      assertTrue(actual.seekExact(expected.term()));
      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    // sequential seekCeil(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
      if (liveOrdsExpected.get((int) i) == false) {
        continue;
      }
      expected.seekExact(i);
      assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    // random seekExact(ord)
    for (long i = 0; i < numOrds; i++) {
      long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
      expected.seekExact(randomOrd);
      actual.seekExact(randomOrd);
      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    // random seekExact(BytesRef)
    for (long i = 0; i < numOrds; i++) {
      long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
      if (liveOrdsExpected.get((int) randomOrd) == false) {
        continue;
      }
      expected.seekExact(randomOrd);
      actual.seekExact(expected.term());
      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    // random seekCeil(BytesRef)
    for (long i = 0; i < numOrds; i++) {
      if (liveOrdsExpected.get((int) i) == false) {
        continue;
      }
      BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
-      SeekStatus expectedStatus = expected.seekCeil(target);
+      SeekStatus expectedStatus = seekCeil(expected, target, liveOrdsExpected);
-      assertEquals(expectedStatus, actual.seekCeil(target));
+      assertEquals(expectedStatus, seekCeil(actual, target, liveOrdsActual));
      if (expectedStatus != SeekStatus.END) {
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
      }
    }