From c2f37e0de2d94fbe4c07f3d0aecfea250788eebb Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 19:07:49 +0000
Subject: [PATCH] javadocs/cleanups

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515563 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/codecs/DocValuesProducer.java      |  2 +
 .../lucene45/Lucene45DocValuesConsumer.java   |  5 +-
 .../lucene45/Lucene45DocValuesFormat.java     | 83 ++++++++++++-------
 .../lucene45/Lucene45DocValuesProducer.java   | 31 ++++++-
 .../lucene/index/BinaryDocValuesWriter.java   |  4 +-
 .../lucene/index/NumericDocValuesWriter.java  |  4 +-
 .../asserting/AssertingNormsFormat.java       |  1 -
 .../org/apache/lucene/codecs/package.html     | 25 ++++++
 .../index/BaseDocValuesFormatTestCase.java    |  2 +-
 .../org/apache/lucene/util/_TestUtil.java     | 11 ++-
 10 files changed, 120 insertions(+), 48 deletions(-)
 create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/package.html
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
index 04778aa1201..05dfcf1b0dd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@@ -75,6 +75,7 @@ public abstract class DocValuesProducer implements Closeable {
     final SortedDocValues in;
     final int maxDoc;
     
+    /** Creates a {@link Bits} returning true if the document has a value */
     public SortedDocsWithField(SortedDocValues in, int maxDoc) {
       this.in = in;
       this.maxDoc = maxDoc;
@@ -102,6 +103,7 @@ public abstract class DocValuesProducer implements Closeable {
     final SortedSetDocValues in;
     final int maxDoc;
     
+    /** Creates a {@link Bits} returning true if the document has a value */
     public SortedSetDocsWithField(SortedSetDocValues in, int maxDoc) {
       this.in = in;
       this.maxDoc = maxDoc;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
index 21ee03075f5..e5afdf70abb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
@@ -17,6 +17,7 @@ package org.apache.lucene.codecs.lucene45;
  * limitations under the License.
  */
 
+import java.io.Closeable; // javadocs
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -37,7 +38,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
 /** writer for {@link Lucene45DocValuesFormat} */
-public class Lucene45DocValuesConsumer extends DocValuesConsumer {
+public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Closeable {
 
   static final int BLOCK_SIZE = 16384;
   static final int ADDRESS_INTERVAL = 16;
@@ -59,6 +60,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
   final IndexOutput data, meta;
   final int maxDoc;
   
+  /** expert: Creates a new writer */
   public Lucene45DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     boolean success = false;
     try {
@@ -273,6 +275,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
     }
   }
   
+  /** expert: writes a value dictionary for a sorted/sortedset field */
   protected void addTermsDict(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
     // first check if its a "fixed-length" terms dict
     int minLength = Integer.MAX_VALUE;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
index 68a44370b30..3f3387ae244 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
@@ -60,19 +60,23 @@ import org.apache.lucene.util.packed.PackedInts;
  *        for each document. The addresses are written in blocks of 16k, with the current absolute
  *        start for the block, and the average (expected) delta per entry. For each document the 
  *        deviation from the delta (actual - expected) is written.
- *    <li>Prefix-compressed Binary: nocommit
+ *    <li>Prefix-compressed Binary: values are written in chunks of 16, with the first value written
+ *        completely and other values sharing prefixes. chunk addresses are written in blocks of 16k,
+ *        with the current absolute start for the block, and the average (expected) delta per entry. 
+ *        For each chunk the deviation from the delta (actual - expected) is written.
  * </ul>
  * <p>
  * {@link DocValuesType#SORTED SORTED}:
  * <ul>
- *    <li>Sorted: an FST mapping deduplicated terms to ordinals is written, along with the per-document
- *        ordinals written using one of the numeric strategies above.
+ *    <li>Sorted: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary, 
+ *        along with the per-document ordinals written using one of the numeric strategies above.
  * </ul>
  * <p>
  * {@link DocValuesType#SORTED_SET SORTED_SET}:
  * <ul>
- *    <li>SortedSet: an FST mapping deduplicated terms to ordinals is written, along with the per-document
- *        ordinal list written using one of the binary strategies above.  
+ *    <li>SortedSet: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary, 
+ *        an ordinal list and per-document index into this list are written using the numeric strategies 
+ *        above. 
  * </ul>
  * <p>
  * Files:
@@ -85,25 +89,35 @@ import org.apache.lucene.util.packed.PackedInts;
  *   <p>The DocValues metadata or .dvm file.</p>
  *   <p>For DocValues field, this stores metadata, such as the offset into the 
  *      DocValues data (.dvd)</p>
- *   <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup></p>
+ *   <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup></p>
  *   <ul>
- *     <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry</li>
- *     <li>NumericEntry --&gt; DataOffset,NumericCompressionType,PackedVersion</li>
- *     <li>BinaryEntry --&gt; DataOffset,DataLength,MinLength,MaxLength,PackedVersion?,BlockSize?</li>
- *     <li>SortedEntry --&gt; DataOffset,ValueCount</li>
+ *     <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li>
+ *     <li>NumericEntry --&gt; GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
+ *     <li>GCDNumericEntry --&gt; NumericHeader,MinValue,GCD</li>
+ *     <li>TableNumericEntry --&gt; NumericHeader,TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup></li>
+ *     <li>DeltaNumericEntry --&gt; NumericHeader</li>
+ *     <li>NumericHeader --&gt; FieldNumber,EntryType,NumericType,MissingOffset,PackedVersion,DataOffset,Count,BlockSize</li>
+ *     <li>BinaryEntry --&gt; FixedBinaryEntry | VariableBinaryEntry | PrefixBinaryEntry</li>
+ *     <li>FixedBinaryEntry --&gt; BinaryHeader</li>
+ *     <li>VariableBinaryEntry --&gt; BinaryHeader,AddressOffset,PackedVersion,BlockSize</li>
+ *     <li>PrefixBinaryEntry --&gt; BinaryHeader,AddressInterval,AddressOffset,PackedVersion,BlockSize</li>
+ *     <li>BinaryHeader --&gt; FieldNumber,EntryType,BinaryType,MissingOffset,MinLength,MaxLength,DataOffset</li>
+ *     <li>SortedEntry --&gt; FieldNumber,EntryType,BinaryEntry,NumericEntry</li>
+ *     <li>SortedSetEntry --&gt; EntryType,BinaryEntry,NumericEntry,NumericEntry</li>
  *     <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --&gt; {@link DataOutput#writeVInt VInt}</li>
- *     <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong Int64}</li>
  *     <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
  *     <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
+ *     <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --&gt; {@link DataOutput#writeLong Int64}</li>
+ *     <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
  *   </ul>
- *   <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
+ *   <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
  *      and an ordinary NumericEntry for the document-to-ord metadata.</p>
- *   <p>SortedSet fields have two entries: a SortedEntry with the FST metadata,
- *      and an ordinary BinaryEntry for the document-to-ord-list metadata.</p>
+ *   <p>SortedSet fields have three entries: a BinaryEntry with the value metadata,
+ *      and two NumericEntries for the document-to-ord-index and ordinal list metadata.</p>
  *   <p>FieldNumber of -1 indicates the end of metadata.</p>
- *   <p>EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)</p>
+ *   <p>EntryType is a 0 (NumericEntry) or 1 (BinaryEntry)</p>
  *   <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
- *   <p>NumericCompressionType indicates how Numeric values will be compressed:
+ *   <p>NumericType indicates how Numeric values will be compressed:
  *      <ul>
  *         <li>0 --&gt; delta-compressed. For each block of 16k integers, every integer is delta-encoded
  *             from the minimum value within the block. 
@@ -112,10 +126,18 @@ import org.apache.lucene.util.packed.PackedInts;
  *         <li>2 --&gt; table-compressed. When the number of unique numeric values is small and it would save space,
  *             a lookup table of unique values is written, followed by the ordinal for each document.
  *      </ul>
+ *   <p>BinaryType indicates how Binary values will be stored:
+ *      <ul>
+ *         <li>0 --&gt; fixed-width. All values have the same length, addressing by multiplication. 
+ *         <li>1 --&gt, variable-width. An address for each value is stored.
+ *         <li>2 --&gt; prefix-compressed. An address to the start of every interval'th value is stored.
+ *      </ul>
  *   <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
  *      If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
  *      Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
  *      is written for the addresses.
+ *   <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
+ *      If its -1, then there are no missing values.
  *   <li><a name="dvd" id="dvd"></a>
  *   <p>The DocValues data or .dvd file.</p>
  *   <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
@@ -125,21 +147,18 @@ import org.apache.lucene.util.packed.PackedInts;
  *     <li>BinaryData --&gt;  {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
  *     <li>SortedData --&gt; {@link FST FST&lt;Int64&gt;}</li>
  *     <li>DeltaCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
- *     <li>TableCompressedNumerics --&gt; TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
- *     <li>GCDCompressedNumerics --&gt; MinValue,GCD,{@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
+ *     <li>TableCompressedNumerics --&gt; {@link PackedInts PackedInts}</li>
+ *     <li>GCDCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
  *     <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
- *     <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
- *     <li>MinValue --&gt; {@link DataOutput#writeLong Int64}</li>
- *     <li>GCD --&gt; {@link DataOutput#writeLong Int64}</li>
  *   </ul>
  *   <p>SortedSet entries store the list of ordinals in their BinaryData as a
  *      sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
  * </ol>
  * @lucene.experimental
  */
-// nocommit: docs are incomplete
 public final class Lucene45DocValuesFormat extends DocValuesFormat {
 
+  /** Sole Constructor */
   public Lucene45DocValuesFormat() {
     super("Lucene45");
   }
@@ -154,14 +173,14 @@ public final class Lucene45DocValuesFormat extends DocValuesFormat {
     return new Lucene45DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
   }
   
-  public static final String DATA_CODEC = "Lucene45DocValuesData";
-  public static final String DATA_EXTENSION = "dvd";
-  public static final String META_CODEC = "Lucene45ValuesMetadata";
-  public static final String META_EXTENSION = "dvm";
-  public static final int VERSION_START = 0;
-  public static final int VERSION_CURRENT = VERSION_START;
-  public static final byte NUMERIC = 0;
-  public static final byte BINARY = 1;
-  public static final byte SORTED = 2;
-  public static final byte SORTED_SET = 3;
+  static final String DATA_CODEC = "Lucene45DocValuesData";
+  static final String DATA_EXTENSION = "dvd";
+  static final String META_CODEC = "Lucene45ValuesMetadata";
+  static final String META_EXTENSION = "dvm";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+  static final byte NUMERIC = 0;
+  static final byte BINARY = 1;
+  static final byte SORTED = 2;
+  static final byte SORTED_SET = 3;
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
index b1ca3a8cf60..b12fa6d75d9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
@@ -25,6 +25,7 @@ import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY
 import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
 import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
 
+import java.io.Closeable; // javadocs
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -53,7 +54,8 @@ import org.apache.lucene.util.packed.BlockPackedReader;
 import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
 import org.apache.lucene.util.packed.PackedInts;
 
-public class Lucene45DocValuesProducer extends DocValuesProducer {
+/** reader for {@link Lucene45DocValuesFormat} */
+public class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable {
   private final Map<Integer,NumericEntry> numerics;
   private final Map<Integer,BinaryEntry> binaries;
   private final Map<Integer,NumericEntry> ords;
@@ -65,6 +67,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
   private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
   private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
   
+  /** expert: instantiates a new reader */
   protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
     // read in the entries from the metadata file.
@@ -317,6 +320,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
+  /** returns an address instance for variable-length binary values.
+   *  @lucene.internal */
   protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
     final MonotonicBlockPackedReader addresses;
     synchronized (addressInstances) {
@@ -358,6 +363,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
+  /** returns an address instance for prefix-compressed binary values. 
+   * @lucene.internal */
   protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
     final MonotonicBlockPackedReader addresses;
     final long interval = bytes.addressInterval;
@@ -434,6 +441,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
+  /** returns an address instance for sortedset ordinal lists
+   * @lucene.internal */
   protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
     final MonotonicBlockPackedReader ordIndex;
     synchronized (ordIndexInstances) {
@@ -509,7 +518,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
-  public Bits getMissingBits(final long offset) throws IOException {
+  private Bits getMissingBits(final long offset) throws IOException {
     if (offset == -1) {
       return new Bits.MatchAllBits(maxDoc);
     } else {
@@ -557,13 +566,20 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     data.close();
   }
   
+  /** metadata entry for a numeric docvalues field */
   protected static class NumericEntry {
+    private NumericEntry() {}
+    /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
     long missingOffset;
+    /** offset to the actual numeric values */
     public long offset;
 
-    public int format;
+    int format;
+    /** packed ints version used to encode these numerics */
     public int packedIntsVersion;
+    /** count of values written */
     public long count;
+    /** packed ints blocksize */
     public int blockSize;
     
     long minValue;
@@ -571,17 +587,26 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     long table[];
   }
   
+  /** metadata entry for a binary docvalues field */
   protected static class BinaryEntry {
+    private BinaryEntry() {}
+    /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
     long missingOffset;
+    /** offset to the actual binary values */
     long offset;
 
     int format;
+    /** count of values written */
     public long count;
     int minLength;
     int maxLength;
+    /** offset to the addressing data that maps a value to its slice of the byte[] */
     public long addressesOffset;
+    /** interval of shared prefix chunks (when using prefix-compressed binary) */
     public long addressInterval;
+    /** packed ints version used to encode addressing information */
     public int packedIntsVersion;
+    /** packed ints blocksize */
     public int blockSize;
   }
   
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index 3a3e301a9b2..f9f82317b62 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -96,8 +96,8 @@ class BinaryDocValuesWriter extends DocValuesWriter {
   }
   
   private long docsWithFieldBytesUsed() {
-    // nocommit: this is not correct
-    return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
+    // size of the long[] + some overhead
+    return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
   }
 
   private void updateBytesUsed() {
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index 7c5aa83fdae..08f065e1df2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -70,8 +70,8 @@ class NumericDocValuesWriter extends DocValuesWriter {
   }
   
   private long docsWithFieldBytesUsed() {
-    // nocommit: this is not correct
-    return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
+    // size of the long[] + some overhead
+    return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
   }
 
   private void updateBytesUsed() {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
index 5579af6245f..8b64401b452 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
@@ -28,7 +28,6 @@ import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 
-// nocommit
 /**
  * Just like {@link Lucene42NormsFormat} but with additional asserts.
  */
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/package.html
new file mode 100644
index 00000000000..ca70ffc3b2e
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/package.html
@@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Codecs for testing (simulate old disk formats, wacky theoretical use cases, etc)
+</body>
+</html>
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index 48e23848aea..c1902cd93eb 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -2538,7 +2538,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     d.close();
   }
 
-  // nocommit: get this out of here and into the deprecated codecs (4.0, 4.2)
+  // TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
   public void testHugeBinaryValueLimit() throws Exception {
     // We only test DVFormats that have a limit
     assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
index d79c948ed7e..b1a43a0fa59 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@@ -757,14 +757,13 @@ public class _TestUtil {
     }
   }
 
-  // nocommit: remove this, push this test to Lucene40/Lucene42 codec tests
+  // TODO: remove this, push this test to Lucene40/Lucene42 codec tests
   public static boolean fieldSupportsHugeBinaryDocValues(String field) {
     String dvFormat = getDocValuesFormat(field);
-    System.out.println(dvFormat);
-    return dvFormat.equals("Lucene45") ||
-      dvFormat.equals("Asserting") || 
-      dvFormat.equals("Disk") ||
-      dvFormat.equals("SimpleText");
+    if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42")) {
+      return false;
+    }
+    return true;
   }
 
   public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {