LUCENE-5675: fix nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596512 13f79535-47bb-0310-9956-ffa450edef68
2014-05-21 08:57:59 +00:00 · 2014-05-21 08:57:59 +00:00 · 18d2cfaf9c
parent d6968c3924
commit 18d2cfaf9c
13 changed files with 184 additions and 269 deletions
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
@ -80,6 +80,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {

    lastDocID = docID;
    lastPosition = -1;
+    lastVersion = -1;
  }

  @Override
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@ -230,15 +230,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
    return ((IDVersionTermState) currentFrame.state).idVersion;
  }

-  /** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */
+  /** Returns false if the term does not exist, or it exists but its version is too old (< minIDVersion). */
  public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {

    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
    }

-    // nocommit would be nice if somehow on doing deletes we didn't have to double-lookup again...
-
    if (term.bytes.length <= target.length) {
      term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
    }
@ -260,7 +258,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {

    boolean changed = false;

-    // nocommit we could stop earlier w/ the version check, every time we traverse an index arc we can check?
+    // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can check?

    if (currentFrame != staticFrame) {

@ -380,7 +378,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
            }
            return false;
          }
-          System.out.println("  term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
+          // System.out.println("  term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);

           if (DEBUG) {
             System.out.println("  target is same as current; return true");
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java
@ -220,11 +220,10 @@ final class IDVersionSegmentTermsEnumFrame {
  }

  void rewind() {
+
    // Force reload:
    fp = fpOrig;
    nextEnt = -1;
-    // nocommit move to BT too?
-    //state.termBlockOrd = 0;
    hasTerms = hasTermsOrig;
    if (isFloor) {
      floorDataReader.rewind();
@ -390,8 +389,7 @@ final class IDVersionSegmentTermsEnumFrame {
    
  public void decodeMetaData() throws IOException {

-    //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
-    System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
+    //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);

    assert nextEnt >= 0;

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
@ -136,9 +136,7 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
      }
      
      // verify
-      if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
-        CodecUtil.checksumEntireFile(indexIn);
-      }
+      CodecUtil.checksumEntireFile(indexIn);

      // Have PostingsReader init itself
      postingsReader.init(in);
@ -167,15 +165,10 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
        final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
        final long sumDocFreq = in.readVLong();
        final int docCount = in.readVInt();
-        final int longsSize = version >= VersionBlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
+        final int longsSize = in.readVInt();

-        BytesRef minTerm, maxTerm;
-        if (version >= VersionBlockTreeTermsWriter.VERSION_MIN_MAX_TERMS) {
-          minTerm = readBytesRef(in);
-          maxTerm = readBytesRef(in);
-        } else {
-          minTerm = maxTerm = null;
-        }
+        BytesRef minTerm = readBytesRef(in);
+        BytesRef maxTerm = readBytesRef(in);
        if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
          throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
        }
@ -217,9 +210,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
    int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_CODEC_NAME,
                          VersionBlockTreeTermsWriter.VERSION_START,
                          VersionBlockTreeTermsWriter.VERSION_CURRENT);
-    if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
-      dirOffset = input.readLong();
-    }
    return version;
  }

@ -228,22 +218,14 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
    int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
                          VersionBlockTreeTermsWriter.VERSION_START,
                          VersionBlockTreeTermsWriter.VERSION_CURRENT);
-    if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
-      indexDirOffset = input.readLong(); 
-    }
    return version;
  }

  /** Seek {@code input} to the directory offset. */
  private void seekDir(IndexInput input, long dirOffset)
      throws IOException {
-    if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
-      input.seek(input.length() - CodecUtil.footerLength() - 8);
-      dirOffset = input.readLong();
-    } else if (version >= VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
-      input.seek(input.length() - 8);
-      dirOffset = input.readLong();
-    }
+    input.seek(input.length() - CodecUtil.footerLength() - 8);
+    dirOffset = input.readLong();
    input.seek(dirOffset);
  }

@ -306,12 +288,10 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {

  @Override
  public void checkIntegrity() throws IOException {
-    if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {      
-      // term dictionary
-      CodecUtil.checksumEntireFile(in);
+    // term dictionary
+    CodecUtil.checksumEntireFile(in);
      
-      // postings
-      postingsReader.checkIntegrity();
-    }
+    // postings
+    postingsReader.checkIntegrity();
  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
@ -84,118 +84,19 @@ import org.apache.lucene.util.packed.PackedInts;
 */

 /**
- * Block-based terms index and dictionary writer.
- * <p>
- * Writes terms dict and index, block-encoding (column
- * stride) each term's metadata for each set of terms
- * between two index terms.
- * <p>
- * Files:
- * <ul>
- *   <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
- *   <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
- * </ul>
- * <p>
- * <a name="Termdictionary" id="Termdictionary"></a>
- * <h3>Term Dictionary</h3>
+ * This is just like {@link BlockTreeTermsWriter}, except it also stores a version per term, and adds a method to its TermsEnum
+ * implementation to seekExact only if the version is >= the specified version.  The version is added to the terms index to avoid seeking if
+ * no term in the block has a high enough version.  The term blocks file is .tiv and the terms index extension is .tipv.
 *
- * <p>The .tim file contains the list of terms in each
- * field along with per-term statistics (such as docfreq)
- * and per-term metadata (typically pointers to the postings list
- * for that term in the inverted index).
- * </p>
- *
- * <p>The .tim is arranged in blocks: with blocks containing
- * a variable number of entries (by default 25-48), where
- * each entry is either a term or a reference to a
- * sub-block.</p>
- *
- * <p>NOTE: The term dictionary can plug into different postings implementations:
- * the postings writer/reader are actually responsible for encoding 
- * and decoding the Postings Metadata and Term Metadata sections.</p>
- *
- * <ul>
- *    <li>TermsDict (.tim) --&gt; Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
- *                               FieldSummary, DirOffset, Footer</li>
- *    <li>NodeBlock --&gt; (OuterNode | InnerNode)</li>
- *    <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata</i>&gt;<sup>EntryCount</sup></li>
- *    <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ? </i>&gt;<sup>EntryCount</sup></li>
- *    <li>TermStats --&gt; DocFreq, TotalTermFreq </li>
- *    <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
- *                            SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm, MaxTerm&gt;<sup>NumFields</sup></li>
- *    <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
- *    <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
- *    <li>MinTerm,MaxTerm --&gt; {@link DataOutput#writeVInt VInt} length followed by the byte[]</li>
- *    <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
- *        FieldNumber,RootCodeLength,DocCount,LongsSize --&gt; {@link DataOutput#writeVInt VInt}</li>
- *    <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --&gt; 
- *        {@link DataOutput#writeVLong VLong}</li>
- *    <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
- * </ul>
- * <p>Notes:</p>
- * <ul>
- *    <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
- *        for the BlockTree implementation.</li>
- *    <li>DirOffset is a pointer to the FieldSummary section.</li>
- *    <li>DocFreq is the count of documents which contain the term.</li>
- *    <li>TotalTermFreq is the total number of occurrences of the term. This is encoded
- *        as the difference between the total number of occurrences and the DocFreq.</li>
- *    <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)</li>
- *    <li>NumTerms is the number of unique terms for the field.</li>
- *    <li>RootCode points to the root block for the field.</li>
- *    <li>SumDocFreq is the total number of postings, the number of term-document pairs across
- *        the entire field.</li>
- *    <li>DocCount is the number of documents that have at least one posting for this field.</li>
- *    <li>LongsSize records how many long values the postings writer/reader record per term
- *        (e.g., to hold freq/prox/doc file offsets).
- *    <li>MinTerm, MaxTerm are the lowest and highest term in this field.</li>
- *    <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
- *        these contain arbitrary per-file data (such as parameters or versioning information) 
- *        and per-term data (such as pointers to inverted files).</li>
- *    <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points
- *        to child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted </li>
- * </ul>
- * <a name="Termindex" id="Termindex"></a>
- * <h3>Term Index</h3>
- * <p>The .tip file contains an index into the term dictionary, so that it can be 
- * accessed randomly.  The index is also used to determine
- * when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
- * <ul>
- *   <li>TermsIndex (.tip) --&gt; Header, FSTIndex<sup>NumFields</sup>
- *                                &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset, Footer</li>
- *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
- *   <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
- *   <li>IndexStartFP --&gt; {@link DataOutput#writeVLong VLong}</li>
- *   <!-- TODO: better describe FST output here -->
- *   <li>FSTIndex --&gt; {@link FST FST&lt;byte[]&gt;}</li>
- *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
- * </ul>
- * <p>Notes:</p>
- * <ul>
- *   <li>The .tip file contains a separate FST for each
- *       field.  The FST maps a term prefix to the on-disk
- *       block that holds all terms starting with that
- *       prefix.  Each field's IndexStartFP points to its
- *       FST.</li>
- *   <li>DirOffset is a pointer to the start of the IndexStartFPs
- *       for all fields</li>
- *   <li>It's possible that an on-disk block would contain
- *       too many terms (more than the allowed maximum
- *       (default: 48)).  When this happens, the block is
- *       sub-divided into new blocks (called "floor
- *       blocks"), and then the output in the FST for the
- *       block's prefix encodes the leading byte of each
- *       sub-block, and its file pointer.
- * </ul>
- *
- * @see BlockTreeTermsReader
 * @lucene.experimental
 */
-// nocommit fix jdocs
+
 final class VersionBlockTreeTermsWriter extends FieldsConsumer {

+  private static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
+
  static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
-                                                                                 PositiveIntOutputs.getSingleton());
+                                                                          PositiveIntOutputs.getSingleton());

  static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput();

@ -224,25 +125,11 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
  /** Initial terms format. */
  public static final int VERSION_START = 0;

-  // nocommit nuke all these old versions
-
-  /** Append-only */
-  public static final int VERSION_APPEND_ONLY = 1;
-
-  /** Meta data as array */
-  public static final int VERSION_META_ARRAY = 2;
-  
-  /** checksums */
-  public static final int VERSION_CHECKSUM = 3;
-
-  /** min/max term */
-  public static final int VERSION_MIN_MAX_TERMS = 4;
-
  /** Current terms format. */
-  public static final int VERSION_CURRENT = VERSION_MIN_MAX_TERMS;
+  public static final int VERSION_CURRENT = VERSION_START;

  /** Extension of terms index file */
-  static final String TERMS_INDEX_EXTENSION = "tip";
+  static final String TERMS_INDEX_EXTENSION = "tipv";
  final static String TERMS_INDEX_CODEC_NAME = "VERSION_BLOCK_TREE_TERMS_INDEX";

  private final IndexOutput out;
@ -297,7 +184,6 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
                                     int maxItemsInBlock)
    throws IOException
  {
-    System.out.println("VBTTW minItemsInBlock=" + minItemsInBlock + " maxItemsInBlock=" + maxItemsInBlock);
    if (minItemsInBlock <= 1) {
      throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
    }
@ -626,8 +512,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
    // following floor blocks:

    void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
-      // nocommit why can't we do floor blocks for root frame?
-      if (prefixLength == 0 || count <= maxItemsInBlock) {
+      if (count <= maxItemsInBlock) {
        // Easy case: not floor block.  Eg, prefix is "foo",
        // and we found 30 terms/sub-blocks starting w/ that
        // prefix, and minItemsInBlock <= 30 <=
@ -645,7 +530,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
        // TODO: we could store min & max suffix start byte
        // in each block, to make floor blocks authoritative

-        //if (DEBUG) {
+        if (DEBUG) {
          final BytesRef prefix = new BytesRef(prefixLength);
          for(int m=0;m<prefixLength;m++) {
            prefix.bytes[m] = (byte) prevTerm.ints[m];
@ -653,7 +538,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
          prefix.length = prefixLength;
          //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
          System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
-        //}
+        }
        //System.out.println("\nwbs count=" + count);

        final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
@ -874,9 +759,9 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
      // Write block header:
      out.writeVInt((length<<1)|(isLastInFloor ? 1:0));

-      // if (DEBUG) {
-      System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
-      // }
+      if (DEBUG) {
+        System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
+      }

      // 1st pass: pack term suffix bytes into byte[] blob
      // TODO: cutover to bulk int codec... simple64?
@ -920,12 +805,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
          BlockTermState state = term.state;
          maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
          final int suffix = term.term.length - prefixLength;
-          // if (DEBUG) {
+          if (DEBUG) {
             BytesRef suffixBytes = new BytesRef(suffix);
             System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
             suffixBytes.length = suffix;
             System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes));
-          // }
+          }
          // For leaf block we write suffix straight
          suffixWriter.writeVInt(suffix);
          suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
@ -957,12 +842,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
            BlockTermState state = term.state;
            maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
            final int suffix = term.term.length - prefixLength;
-            // if (DEBUG) {
+            if (DEBUG) {
               BytesRef suffixBytes = new BytesRef(suffix);
               System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
               suffixBytes.length = suffix;
               System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes));
-            // }
+            }
            // For non-leaf block we borrow 1 bit to record
            // if entry is term or sub-block
            suffixWriter.writeVInt(suffix<<1);
@ -1007,12 +892,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
            suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
            assert block.fp < startFP;

-            // if (DEBUG) {
+            if (DEBUG) {
               BytesRef suffixBytes = new BytesRef(suffix);
               System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
               suffixBytes.length = suffix;
               System.out.println("    " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
-            // }
+            }

            suffixWriter.writeVLong(startFP - block.fp);
            subIndices.add(block.index);
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java
@ -28,7 +28,7 @@ import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.util.BytesRef;

-// nocommit can we take a BytesRef token instead?
+// TODO: can we take a BytesRef token instead?

 /** Produces a single String token from the provided value, with the provided payload. */
 class StringAndPayloadField extends Field {
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
@ -49,6 +49,7 @@ import org.apache.lucene.index.PerThreadPKLookup;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@ -90,16 +91,16 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    String next();
  }

-  // nocommit make a similar test for BT, w/ varied IDs:
+  // TODO make a similar test for BT, w/ varied IDs:

  public void testRandom() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    // nocommit randomize the block sizes:
-    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
-    // nocommit put back
-    //RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
-    IndexWriter w = new IndexWriter(dir, iwc);
+    int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
+    int maxItemsInBlock = 2*(minItemsInBlock-1) + random().nextInt(50);
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    //IndexWriter w = new IndexWriter(dir, iwc);
    int numDocs = atLeast(1000);
    Map<String,Long> idValues = new HashMap<String,Long>();
    int docUpto = 0;
@ -210,9 +211,10 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
    }

+    List<String> idsList = new ArrayList<>();
+
    long version = 0;
    while (docUpto < numDocs) {
-      // nocommit add deletes in
      String idValue = idPrefix + ids.next();
      if (idValues.containsKey(idValue)) {
        continue;
@ -229,11 +231,38 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      Document doc = new Document();
      doc.add(makeIDField(idValue, version));
      w.addDocument(doc);
+      idsList.add(idValue);
+
+      if (idsList.size() > 0 && random().nextInt(7) == 5) {
+        // Randomly delete or update a previous ID
+        idValue = idsList.get(random().nextInt(idsList.size()));
+        if (random().nextBoolean()) {
+          if (useMonotonicVersion) {
+            version += TestUtil.nextInt(random(), 1, 10);
+          } else {
+            version = random().nextLong() & 0x7fffffffffffffffL;
+          }
+          doc = new Document();
+          doc.add(makeIDField(idValue, version));
+          if (VERBOSE) {
+            System.out.println("  update " + idValue + " -> " + version);
+          }
+          w.updateDocument(new Term("id", idValue), doc);
+          idValues.put(idValue, version);
+        } else {
+          if (VERBOSE) {
+            System.out.println("  delete " + idValue);
+          }
+          w.deleteDocuments(new Term("id", idValue));
+          idValues.remove(idValue);
+        }        
+      }
+
      docUpto++;
    }

-    //IndexReader r = w.getReader();
-    IndexReader r = DirectoryReader.open(w, true);
+    IndexReader r = w.getReader();
+    //IndexReader r = DirectoryReader.open(w, true);
    PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");

    List<Map.Entry<String,Long>> idValuesList = new ArrayList<>(idValues.entrySet());
@ -242,7 +271,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      String idValue;

      if (random().nextBoolean()) {
-        idValue = idValuesList.get(random().nextInt(numDocs)).getKey();
+        idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
      } else if (random().nextBoolean()) {
        idValue = ids.next();
      } else {
@ -318,14 +347,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    payload.length = 8;
    IDVersionPostingsFormat.longToBytes(version, payload);
    return new StringAndPayloadField("id", id, payload);
-
-    /*
-    Field field = newTextField("id", "", Field.Store.NO);
-    Token token = new Token(id, 0, id.length());
-    token.setPayload(payload);
-    field.setTokenStream(new CannedTokenStream(token));
-    return field;
-    */
  }

  public void testMoreThanOneDocPerIDOneSegment() throws Exception {
@ -353,6 +374,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    iwc.setMergePolicy(new TieredMergePolicy());
    MergeScheduler ms = iwc.getMergeScheduler();
    if (ms instanceof ConcurrentMergeScheduler) {
      iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
@ -362,7 +384,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
          }
        });
    }
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(makeIDField("id", 17));
    w.addDocument(doc);
@ -380,7 +402,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      // expected
      assertTrue(ioe.getCause() instanceof IllegalArgumentException);
    }
-    w.w.close();
+    w.close();
    dir.close();
  }

--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
@ -37,7 +37,7 @@ import org.apache.lucene.util.Bits;
 *  time. 
 *  @lucene.experimental */

-// nocommit mv under blocktree?  but ... it's used by others (e.g. block terms)
+// TODO: maybe move under blocktree?  but it's used by other terms dicts (e.g. Block)

 // TODO: find a better name; this defines the API that the
 // terms dict impls use to talk to a postings impl.
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -1640,22 +1640,15 @@ public class CheckIndex {
          // Again, with the one doc deleted:
          checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);

-          // Only agg stats if the doc is live:
-          final boolean doStats = liveDocs == null || liveDocs.get(j);
-
-          if (doStats == false) {
-            // nocommit is it OK to stop verifying deleted docs?
+          if (liveDocs != null && liveDocs.get(j) == false) {
+            // Only check live docs
            continue;
          }

-          if (doStats) {
-            status.docCount++;
-          }
+          status.docCount++;

          for(String field : tfv) {
-            if (doStats) {
-              status.totVectors++;
-            }
+            status.totVectors++;

            // Make sure FieldInfo thinks this field is vector'd:
            final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -94,6 +94,8 @@ final class DefaultIndexingChain extends DocConsumer {
    // aborting on any exception from this method

    int numDocs = state.segmentInfo.getDocCount();
+
+    // TODO: we could set liveDocs earlier and then fix DVs to also not write deleted docs:
    writeNorms(state);
    writeDocValues(state);
    
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@ -34,7 +34,6 @@ final class FreqProxTermsWriter extends TermsHash {
  }

  private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
-    System.out.println("applyDeletes segUpdates=" + state.segUpdates);

    // Process any pending Term deletes for this newly
    // flushed segment:
@ -108,8 +107,6 @@ final class FreqProxTermsWriter extends TermsHash {
      fields.setLiveDocs(state.liveDocs);
    }

-    System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat());
-
    FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
    boolean success = false;
    try {
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -1696,7 +1696,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
    if (doWait) {
      synchronized(this) {
        while(true) {
-
          if (hitOOM) {
            throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
          }
--- a/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java
+++ b/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java
@ -17,8 +17,7 @@ package org.apache.lucene.uninverting;
 * limitations under the License.
 */

-import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
-
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@ -47,14 +46,17 @@ import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;

+import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+
 public class TestFieldCacheVsDocValues extends LuceneTestCase {
  
  public void testByteMissingVsFieldCache() throws Exception {
@ -315,14 +317,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    
    // delete some docs
-    // nocommit hmmm what to do
-    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
-    */
    writer.shutdown();
    
    // compare
@ -331,7 +330,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
      AtomicReader r = context.reader();
      SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
      SortedDocValues actual = r.getSortedDocValues("dv");
-      assertEquals(r.maxDoc(), expected, actual);
+      assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
    }
    ir.close();
    dir.close();
@ -382,14 +381,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    
    // delete some docs
-    // nocommit hmmm what to do
-    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
-    */
    
    // compare per-segment
    DirectoryReader ir = writer.getReader();
@ -397,7 +393,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
      AtomicReader r = context.reader();
      SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
      SortedSetDocValues actual = r.getSortedSetDocValues("dv");
-      assertEquals(r.maxDoc(), expected, actual);
+      assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
    }
    ir.close();
    
@ -408,7 +404,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    AtomicReader ar = getOnlySegmentReader(ir);
    SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
    SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
-    assertEquals(ir.maxDoc(), expected, actual);
+    assertEquals(ir.maxDoc(), ar.getLiveDocs(), expected, actual);
    ir.close();
    
    writer.shutdown();
@ -449,14 +445,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    
    // delete some docs
-    // nocommit hmmm what to do
-    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
-    */

    // merge some segments and ensure that at least one of them has more than
    // 256 values
@ -496,102 +489,149 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
  }
  
-  private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
-    assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
+  private void assertEquals(int maxDoc, Bits liveDocs, SortedDocValues expected, SortedDocValues actual) throws Exception {
+    assertEquals(maxDoc, liveDocs, DocValues.singleton(expected), DocValues.singleton(actual));
  }
  
-  private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
+  private void assertEquals(int maxDoc, Bits liveDocs, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
    // can be null for the segment if no docs actually had any SortedDocValues
    // in this case FC.getDocTermsOrds returns EMPTY
    if (actual == null) {
      assertEquals(DocValues.EMPTY_SORTED_SET, expected);
      return;
    }
-    assertEquals(expected.getValueCount(), actual.getValueCount());
-    // compare ord lists
+
+    FixedBitSet liveOrdsExpected = new FixedBitSet((int) expected.getValueCount());
+    FixedBitSet liveOrdsActual = new FixedBitSet((int) actual.getValueCount());
+
+    BytesRef expectedBytes = new BytesRef();
+    BytesRef actualBytes = new BytesRef();
+
+    // compare values for all live docs:
    for (int i = 0; i < maxDoc; i++) {
+      if (liveDocs != null && liveDocs.get(i) == false) {
+        // Don't check deleted docs
+        continue;
+      }
      expected.setDocument(i);
      actual.setDocument(i);
      long expectedOrd;
      while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
-        assertEquals(expectedOrd, actual.nextOrd());
+        expected.lookupOrd(expectedOrd, expectedBytes);
+        long actualOrd = actual.nextOrd();
+        assertTrue(actualOrd != NO_MORE_ORDS);
+        actual.lookupOrd(actualOrd, actualBytes);
+        assertEquals(expectedBytes, actualBytes);
+        liveOrdsExpected.set((int) expectedOrd);
+        liveOrdsActual.set((int) actualOrd);
      }
+
      assertEquals(NO_MORE_ORDS, actual.nextOrd());
    }

+    // Make sure both have same number of non-deleted values:
+    assertEquals(liveOrdsExpected.cardinality(), liveOrdsActual.cardinality());
+    
    // compare ord dictionary
-    BytesRef expectedBytes = new BytesRef();
-    BytesRef actualBytes = new BytesRef();
-    for (long i = 0; i < expected.getValueCount(); i++) {
-      expected.lookupTerm(expectedBytes);
-      actual.lookupTerm(actualBytes);
+    int expectedOrd = 0;
+    int actualOrd = 0;
+    while (expectedOrd < expected.getValueCount()) {
+      expectedOrd = liveOrdsExpected.nextSetBit(expectedOrd);
+      if (expectedOrd == -1) {
+        break;
+      }
+      actualOrd = liveOrdsActual.nextSetBit(actualOrd);
+      expected.lookupOrd(expectedOrd, expectedBytes);
+      actual.lookupOrd(actualOrd, actualBytes);
      assertEquals(expectedBytes, actualBytes);
+      expectedOrd++;
+      actualOrd++;
    }
+    assertTrue(actualOrd == actual.getValueCount() || liveOrdsActual.nextSetBit(actualOrd) == -1);
    
    // compare termsenum
-    assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
+    assertEquals(expected.getValueCount(), expected.termsEnum(), liveOrdsExpected, actual.termsEnum(), liveOrdsActual);
  }

-  private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
+  /** Does termsEnum.next() but then skips over deleted ords. */
+  private static BytesRef next(TermsEnum termsEnum, Bits liveOrds) throws IOException {
+    while (termsEnum.next() != null) {
+      if (liveOrds.get((int) termsEnum.ord())) {
+        return termsEnum.term();
+      }
+    }
+    return null;
+  }
+
+  /** Does termsEnum.seekCeil() but then skips over deleted ords. */
+  private static SeekStatus seekCeil(TermsEnum termsEnum, BytesRef term, Bits liveOrds) throws IOException {
+    SeekStatus status = termsEnum.seekCeil(term);
+    if (status == SeekStatus.END) {
+      return status;
+    } else {
+      if (liveOrds.get((int) termsEnum.ord()) == false) {
+        while (termsEnum.next() != null) {
+          if (liveOrds.get((int) termsEnum.ord())) {
+            return SeekStatus.NOT_FOUND;
+          }
+        }
+        return SeekStatus.END;
+      } else {
+        return status;
+      }
+    }
+  }
+
+  private void assertEquals(long numOrds, TermsEnum expected, Bits liveOrdsExpected, TermsEnum actual, Bits liveOrdsActual) throws Exception {
    BytesRef ref;
    
    // sequential next() through all terms
-    while ((ref = expected.next()) != null) {
-      assertEquals(ref, actual.next());
-      assertEquals(expected.ord(), actual.ord());
-      assertEquals(expected.term(), actual.term());
-    }
-    assertNull(actual.next());
-    
-    // sequential seekExact(ord) through all terms
-    for (long i = 0; i < numOrds; i++) {
-      expected.seekExact(i);
-      actual.seekExact(i);
-      assertEquals(expected.ord(), actual.ord());
+    while ((ref = next(expected, liveOrdsExpected)) != null) {
+      assertEquals(ref, next(actual, liveOrdsActual));
      assertEquals(expected.term(), actual.term());
    }
+    assertNull(next(actual, liveOrdsActual));
    
    // sequential seekExact(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
+      if (liveOrdsExpected.get((int) i) == false) {
+        continue;
+      }
      expected.seekExact(i);
      assertTrue(actual.seekExact(expected.term()));
-      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    
    // sequential seekCeil(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
+      if (liveOrdsExpected.get((int) i) == false) {
+        continue;
+      }
      expected.seekExact(i);
      assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
-      assertEquals(expected.ord(), actual.ord());
-      assertEquals(expected.term(), actual.term());
-    }
-    
-    // random seekExact(ord)
-    for (long i = 0; i < numOrds; i++) {
-      long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
-      expected.seekExact(randomOrd);
-      actual.seekExact(randomOrd);
-      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    
    // random seekExact(BytesRef)
    for (long i = 0; i < numOrds; i++) {
      long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
+      if (liveOrdsExpected.get((int) randomOrd) == false) {
+        continue;
+      }
      expected.seekExact(randomOrd);
      actual.seekExact(expected.term());
-      assertEquals(expected.ord(), actual.ord());
      assertEquals(expected.term(), actual.term());
    }
    
    // random seekCeil(BytesRef)
    for (long i = 0; i < numOrds; i++) {
+      if (liveOrdsExpected.get((int) i) == false) {
+        continue;
+      }
      BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
-      SeekStatus expectedStatus = expected.seekCeil(target);
-      assertEquals(expectedStatus, actual.seekCeil(target));
+      SeekStatus expectedStatus = seekCeil(expected, target, liveOrdsExpected);
+      assertEquals(expectedStatus, seekCeil(actual, target, liveOrdsActual));
      if (expectedStatus != SeekStatus.END) {
-        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
      }
    }