From 1c982a5f566dbd15cd1ff51652138e6924fa9d9e Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Fri, 16 Aug 2013 10:24:36 +0000
Subject: [PATCH 01/16] create branch

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1514641 13f79535-47bb-0310-9956-ffa450edef68

From 3be8ed1d108857904a4af0fe75ca86c4de50368a Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Fri, 16 Aug 2013 10:25:38 +0000
Subject: [PATCH 02/16] LUCENE-5178: add 'missing' support to docvalues
 (simpletext only)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1514642 13f79535-47bb-0310-9956-ffa450edef68
---
 .../codecs/diskdv/DiskDocValuesConsumer.java  |  52 ++++-
 .../codecs/diskdv/DiskDocValuesProducer.java  |  14 ++
 .../simpletext/SimpleTextDocValuesFormat.java |  11 +-
 .../simpletext/SimpleTextDocValuesReader.java |  99 ++++++++-
 .../simpletext/SimpleTextDocValuesWriter.java |  32 ++-
 .../lucene/codecs/DocValuesConsumer.java      |  47 ++--
 .../lucene/codecs/DocValuesProducer.java      |  62 ++++++
 .../lucene/codecs/MissingOrdRemapper.java     | 124 +++++++++++
 .../lucene40/Lucene40DocValuesReader.java     |   6 +
 .../lucene42/Lucene42DocValuesConsumer.java   |  53 ++++-
 .../lucene42/Lucene42DocValuesProducer.java   |   9 +
 .../perfield/PerFieldDocValuesFormat.java     |   7 +
 .../org/apache/lucene/index/AtomicReader.java |   6 +
 .../lucene/index/BinaryDocValuesWriter.java   |  34 ++-
 .../org/apache/lucene/index/CheckIndex.java   |  79 +++++--
 .../lucene/index/DocValuesProcessor.java      |   2 +-
 .../lucene/index/FilterAtomicReader.java      |   6 +
 .../apache/lucene/index/MultiDocValues.java   |  46 ++++
 .../lucene/index/NormsConsumerPerField.java   |   2 +-
 .../lucene/index/NumericDocValuesWriter.java  |  32 ++-
 .../lucene/index/ParallelAtomicReader.java    |   7 +
 .../lucene/index/SegmentCoreReaders.java      |  36 ++-
 .../apache/lucene/index/SegmentMerger.java    |  17 +-
 .../apache/lucene/index/SegmentReader.java    |   6 +
 .../index/SlowCompositeReaderWrapper.java     |   6 +
 .../apache/lucene/index/SortedDocValues.java  |   7 +-
 .../lucene/index/SortedDocValuesWriter.java   |  18 +-
 .../org/apache/lucene/search/FieldCache.java  |  25 +--
 .../apache/lucene/search/FieldCacheImpl.java  |   7 +-
 .../apache/lucene/search/TestFieldCache.java  |   2 +-
 .../facet42/Facet42DocValuesProducer.java     |   8 +
 .../util/FacetsPayloadMigrationReader.java    |  13 ++
 .../highlight/WeightedSpanTermExtractor.java  |   5 +
 .../lucene/index/memory/MemoryIndex.java      |   5 +
 .../index/sorter/SortingAtomicReader.java     |  46 ++--
 .../asserting/AssertingDocValuesFormat.java   |  38 ++--
 .../CheapBastardDocValuesProducer.java        |  13 ++
 .../lucene40/Lucene40DocValuesWriter.java     |  62 ++++--
 .../index/BaseDocValuesFormatTestCase.java    | 207 +++++++++++++++++-
 .../lucene/index/FieldFilterAtomicReader.java |   6 +
 .../apache/lucene/util/LuceneTestCase.java    |   7 +
 .../org/apache/lucene/util/_TestUtil.java     |  10 +
 .../apache/solr/request/DocValuesFacets.java  |   1 +
 .../org/apache/solr/search/TestDocSet.java    |   5 +
 44 files changed, 1107 insertions(+), 173 deletions(-)
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
index 2d4853a66b0..b5124871237 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
@@ -23,6 +23,7 @@ import java.util.HashSet;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.MissingOrdRemapper;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -92,8 +93,9 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
     if (optimizeStorage) {
       uniqueValues = new HashSet<>();
 
+      // nocommit: impl null values (ideally smartly)
       for (Number nv : values) {
-        final long v = nv.longValue();
+        final long v = nv == null ? 0 : nv.longValue();
 
         if (gcd != 1) {
           if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
@@ -151,14 +153,15 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
         meta.writeLong(gcd);
         final BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE);
         for (Number nv : values) {
-          quotientWriter.add((nv.longValue() - minValue) / gcd);
+          long value = nv == null ? 0 : nv.longValue();
+          quotientWriter.add((value - minValue) / gcd);
         }
         quotientWriter.finish();
         break;
       case DELTA_COMPRESSED:
         final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
         for (Number nv : values) {
-          writer.add(nv.longValue());
+          writer.add(nv == null ? 0 : nv.longValue());
         }
         writer.finish();
         break;
@@ -173,7 +176,7 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
         final int bitsRequired = PackedInts.bitsRequired(uniqueValues.size() - 1);
         final PackedInts.Writer ordsWriter = PackedInts.getWriterNoHeader(data, PackedInts.Format.PACKED, (int) count, bitsRequired, PackedInts.DEFAULT_BUFFER_SIZE);
         for (Number nv : values) {
-          ordsWriter.add(encode.get(nv.longValue()));
+          ordsWriter.add(encode.get(nv == null ? 0 : nv.longValue()));
         }
         ordsWriter.finish();
         break;
@@ -192,9 +195,12 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
     final long startFP = data.getFilePointer();
     long count = 0;
     for(BytesRef v : values) {
-      minLength = Math.min(minLength, v.length);
-      maxLength = Math.max(maxLength, v.length);
-      data.writeBytes(v.bytes, v.offset, v.length);
+      final int length = v == null ? 0 : v.length;
+      minLength = Math.min(minLength, length);
+      maxLength = Math.max(maxLength, length);
+      if (v != null) {
+        data.writeBytes(v.bytes, v.offset, v.length);
+      }
       count++;
     }
     meta.writeVInt(minLength == maxLength ? BINARY_FIXED_UNCOMPRESSED : BINARY_VARIABLE_UNCOMPRESSED);
@@ -213,7 +219,9 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
       final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
       long addr = 0;
       for (BytesRef v : values) {
-        addr += v.length;
+        if (v != null) {
+          addr += v.length;
+        }
         writer.add(addr);
       }
       writer.finish();
@@ -278,6 +286,34 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
 
   @Override
   public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+    // nocommit: remove this hack and support missing!
+
+    // three cases for simulating the old writer:
+    // 1. no missing
+    // 2. missing (and empty string in use): remap ord=-1 -> ord=0
+    // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values
+    boolean anyMissing = false;
+    for (Number n : docToOrd) {
+      if (n.longValue() == -1) {
+        anyMissing = true;
+        break;
+      }
+    }
+    
+    boolean hasEmptyString = false;
+    for (BytesRef b : values) {
+      hasEmptyString = b.length == 0;
+      break;
+    }
+    
+    if (!anyMissing) {
+      // nothing to do
+    } else if (hasEmptyString) {
+      docToOrd = MissingOrdRemapper.mapMissingToOrd0(docToOrd);
+    } else {
+      docToOrd = MissingOrdRemapper.mapAllOrds(docToOrd);
+      values = MissingOrdRemapper.insertEmptyValue(values);
+    }
     meta.writeVInt(field.number);
     meta.writeByte(DiskDocValuesFormat.SORTED);
     addTermsDict(field, values);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
index c100b84142c..11a60fdaf6c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
@@ -32,6 +32,7 @@ import java.util.Map;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesProducer.SortedSetDocsWithField;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -59,6 +60,7 @@ class DiskDocValuesProducer extends DocValuesProducer {
   private final Map<Integer,NumericEntry> ords;
   private final Map<Integer,NumericEntry> ordIndexes;
   private final IndexInput data;
+  private final int maxDoc;
 
   // memory-resident structures
   private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
@@ -68,6 +70,7 @@ class DiskDocValuesProducer extends DocValuesProducer {
     String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
     // read in the entries from the metadata file.
     IndexInput in = state.directory.openInput(metaName, state.context);
+    this.maxDoc = state.segmentInfo.getDocCount();
     boolean success = false;
     final int version;
     try {
@@ -490,6 +493,17 @@ class DiskDocValuesProducer extends DocValuesProducer {
     };
   }
 
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    // nocommit: only use this if the field's entry has missing values (write that),
+    // otherwise return MatchAllBits
+    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
+      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+    } else {
+      return new Bits.MatchAllBits(maxDoc);
+    }
+  }
+
   @Override
   public void close() throws IOException {
     data.close();
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
index 02557c95b57..c256367f23f 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
@@ -38,12 +38,16 @@ import org.apache.lucene.index.SegmentWriteState;
  *    minvalue 0
  *    pattern 000
  *  005
+ *  T
  *  234
+ *  T
  *  123
+ *  T
  *  ...
  *  </pre>
  *  so a document's value (delta encoded from minvalue) can be retrieved by 
- *  seeking to startOffset + (1+pattern.length())*docid. The extra 1 is the newline.
+ *  seeking to startOffset + (1+pattern.length()+2)*docid. The extra 1 is the newline. 
+ *  The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
  *  
  *  for bytes this is also a "fixed-width" file, for example:
  *  <pre>
@@ -53,12 +57,15 @@ import org.apache.lucene.index.SegmentWriteState;
  *    pattern 0
  *  length 6
  *  foobar[space][space]
+ *  T
  *  length 3
  *  baz[space][space][space][space][space]
+ *  T
  *  ...
  *  </pre>
- *  so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*doc
+ *  so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength+2)*doc
  *  the extra 9 is 2 newlines, plus "length " itself.
+ *  the extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
  *  
  *  for sorted bytes this is a fixed-width file, for example:
  *  <pre>
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
index db5ec4e09f3..c625f4ea2e8 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
@@ -28,6 +28,7 @@ import java.util.Locale;
 import java.util.Map;
 
 import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesProducer.SortedSetDocsWithField;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
@@ -38,6 +39,7 @@ import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.FieldInfo.DocValuesType;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper;
 
@@ -100,7 +102,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
         assert startsWith(PATTERN);
         field.pattern = stripPrefix(PATTERN);
         field.dataStartFilePointer = data.getFilePointer();
-        data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
+        data.seek(data.getFilePointer() + (1+field.pattern.length()+2) * maxDoc);
       } else if (dvType == DocValuesType.BINARY) {
         readLine();
         assert startsWith(MAXLENGTH);
@@ -109,7 +111,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
         assert startsWith(PATTERN);
         field.pattern = stripPrefix(PATTERN);
         field.dataStartFilePointer = data.getFilePointer();
-        data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * maxDoc);
+        data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength+2) * maxDoc);
       } else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) {
         readLine();
         assert startsWith(NUMVALUES);
@@ -158,7 +160,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
           if (docID < 0 || docID >= maxDoc) {
             throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
           }
-          in.seek(field.dataStartFilePointer + (1+field.pattern.length())*docID);
+          in.seek(field.dataStartFilePointer + (1+field.pattern.length()+2)*docID);
           SimpleTextUtil.readLine(in, scratch);
           //System.out.println("parsing delta: " + scratch.utf8ToString());
           BigDecimal bd;
@@ -169,6 +171,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
             e.initCause(pe);
             throw e;
           }
+          SimpleTextUtil.readLine(in, scratch); // read the line telling us if its real or not
           return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
         } catch (IOException ioe) {
           throw new RuntimeException(ioe);
@@ -176,6 +179,30 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
       }
     };
   }
+  
+  private Bits getNumericDocsWithField(FieldInfo fieldInfo) throws IOException {
+    final OneField field = fields.get(fieldInfo.name);
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    return new Bits() {
+      @Override
+      public boolean get(int index) {
+        try {
+          in.seek(field.dataStartFilePointer + (1+field.pattern.length()+2)*index);
+          SimpleTextUtil.readLine(in, scratch); // data
+          SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
+          return scratch.bytes[scratch.offset] == (byte) 'T';
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+      @Override
+      public int length() {
+        return maxDoc;
+      }
+    };
+  }
 
   @Override
   public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
@@ -196,7 +223,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
           if (docID < 0 || docID >= maxDoc) {
             throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
           }
-          in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength)*docID);
+          in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength+2)*docID);
           SimpleTextUtil.readLine(in, scratch);
           assert StringHelper.startsWith(scratch, LENGTH);
           int len;
@@ -217,6 +244,45 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
       }
     };
   }
+  
+  private Bits getBinaryDocsWithField(FieldInfo fieldInfo) throws IOException {
+    final OneField field = fields.get(fieldInfo.name);
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    return new Bits() {
+      @Override
+      public boolean get(int index) {
+        try {
+          in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength+2)*index);
+          SimpleTextUtil.readLine(in, scratch);
+          assert StringHelper.startsWith(scratch, LENGTH);
+          int len;
+          try {
+            len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
+            e.initCause(pe);
+            throw e;
+          }
+          // skip past bytes
+          byte bytes[] = new byte[len];
+          in.readBytes(bytes, 0, len);
+          SimpleTextUtil.readLine(in, scratch); // newline
+          SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
+          return scratch.bytes[scratch.offset] == (byte) 'T';
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public int length() {
+        return maxDoc;
+      }
+    };
+  }
 
   @Override
   public SortedDocValues getSorted(FieldInfo fieldInfo) throws IOException {
@@ -241,7 +307,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
           in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + docID * (1 + field.ordPattern.length()));
           SimpleTextUtil.readLine(in, scratch);
           try {
-            return ordDecoder.parse(scratch.utf8ToString()).intValue();
+            return (int) ordDecoder.parse(scratch.utf8ToString()).longValue()-1;
           } catch (ParseException pe) {
             CorruptIndexException e = new CorruptIndexException("failed to parse ord (resource=" + in + ")");
             e.initCause(pe);
@@ -255,8 +321,12 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
       @Override
       public void lookupOrd(int ord, BytesRef result) {
         try {
-          if (ord < 0 || ord >= field.numValues) {
-            throw new IndexOutOfBoundsException("ord must be 0 .. " + (field.numValues-1) + "; got " + ord);
+          if (ord == -1) {
+            result.length = 0;
+            return;
+          }
+          if (ord < -1 || ord >= field.numValues) {
+            throw new IndexOutOfBoundsException("ord must be -1 .. " + (field.numValues-1) + "; got " + ord);
           }
           in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
           SimpleTextUtil.readLine(in, scratch);
@@ -362,6 +432,21 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
       }
     };
   }
+  
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
+      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+    } else if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED) {
+      return new SortedDocsWithField(getSorted(field), maxDoc);
+    } else if (field.getDocValuesType() == FieldInfo.DocValuesType.BINARY) {
+      return getBinaryDocsWithField(field);
+    } else if (field.getDocValuesType() == FieldInfo.DocValuesType.NUMERIC) {
+      return getNumericDocsWithField(field);
+    } else {
+      return new Bits.MatchAllBits(maxDoc);
+    }
+  }
 
   @Override
   public void close() throws IOException {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
index 2f86255cbd6..e5f1e35007e 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
@@ -78,7 +78,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
     long minValue = Long.MAX_VALUE;
     long maxValue = Long.MIN_VALUE;
     for(Number n : values) {
-      long v = n.longValue();
+      long v = n == null ? 0 : n.longValue();
       minValue = Math.min(minValue, v);
       maxValue = Math.max(maxValue, v);
     }
@@ -112,13 +112,19 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
 
     // second pass to write the values
     for(Number n : values) {
-      long value = n.longValue();
+      long value = n == null ? 0 : n.longValue();
       assert value >= minValue;
       Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
       String s = encoder.format(delta);
       assert s.length() == patternString.length();
       SimpleTextUtil.write(data, s, scratch);
       SimpleTextUtil.writeNewline(data);
+      if (n == null) {
+        SimpleTextUtil.write(data, "F", scratch);
+      } else {
+        SimpleTextUtil.write(data, "T", scratch);
+      }
+      SimpleTextUtil.writeNewline(data);
       numDocsWritten++;
       assert numDocsWritten <= numDocs;
     }
@@ -132,7 +138,8 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
     assert field.getDocValuesType() == DocValuesType.BINARY;
     int maxLength = 0;
     for(BytesRef value : values) {
-      maxLength = Math.max(maxLength, value.length);
+      final int length = value == null ? 0 : value.length;
+      maxLength = Math.max(maxLength, length);
     }
     writeFieldEntry(field, FieldInfo.DocValuesType.BINARY);
 
@@ -155,19 +162,28 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
     int numDocsWritten = 0;
     for(BytesRef value : values) {
       // write length
+      final int length = value == null ? 0 : value.length;
       SimpleTextUtil.write(data, LENGTH);
-      SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+      SimpleTextUtil.write(data, encoder.format(length), scratch);
       SimpleTextUtil.writeNewline(data);
         
       // write bytes -- don't use SimpleText.write
       // because it escapes:
-      data.writeBytes(value.bytes, value.offset, value.length);
+      if (value != null) {
+        data.writeBytes(value.bytes, value.offset, value.length);
+      }
 
       // pad to fit
-      for (int i = value.length; i < maxLength; i++) {
+      for (int i = length; i < maxLength; i++) {
         data.writeByte((byte)' ');
       }
       SimpleTextUtil.writeNewline(data);
+      if (value == null) {
+        SimpleTextUtil.write(data, "F", scratch);
+      } else {
+        SimpleTextUtil.write(data, "T", scratch);
+      }
+      SimpleTextUtil.writeNewline(data);
       numDocsWritten++;
     }
 
@@ -209,7 +225,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
     SimpleTextUtil.writeNewline(data);
     final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
     
-    int maxOrdBytes = Integer.toString(valueCount).length();
+    int maxOrdBytes = Long.toString(valueCount+1L).length();
     sb.setLength(0);
     for (int i = 0; i < maxOrdBytes; i++) {
       sb.append('0');
@@ -246,7 +262,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
     assert valuesSeen == valueCount;
 
     for(Number ord : docToOrd) {
-      SimpleTextUtil.write(data, ordEncoder.format(ord.intValue()), scratch);
+      SimpleTextUtil.write(data, ordEncoder.format(ord.longValue()+1), scratch);
       SimpleTextUtil.writeNewline(data);
     }
   }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 921b94dff69..e0aac6d5cbe 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -69,7 +69,8 @@ public abstract class DocValuesConsumer implements Closeable {
   /**
    * Writes numeric docvalues for a field.
    * @param field field information
-   * @param values Iterable of numeric values (one for each document).
+   * @param values Iterable of numeric values (one for each document). {@code null} indicates
+   *               a missing value.
    * @throws IOException if an I/O error occurred.
    */
   public abstract void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException;    
@@ -77,7 +78,8 @@ public abstract class DocValuesConsumer implements Closeable {
   /**
    * Writes binary docvalues for a field.
    * @param field field information
-   * @param values Iterable of binary values (one for each document).
+   * @param values Iterable of binary values (one for each document). {@code null} indicates
+   *               a missing value.
    * @throws IOException if an I/O error occurred.
    */
   public abstract void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException;
@@ -86,7 +88,8 @@ public abstract class DocValuesConsumer implements Closeable {
    * Writes pre-sorted binary docvalues for a field.
    * @param field field information
    * @param values Iterable of binary values in sorted order (deduplicated).
-   * @param docToOrd Iterable of ordinals (one for each document).
+   * @param docToOrd Iterable of ordinals (one for each document). {@code -1} indicates
+   *                 a missing value.
    * @throws IOException if an I/O error occurred.
    */
   public abstract void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException;
@@ -95,7 +98,8 @@ public abstract class DocValuesConsumer implements Closeable {
    * Writes pre-sorted set docvalues for a field
    * @param field field information
    * @param values Iterable of binary values in sorted order (deduplicated).
-   * @param docToOrdCount Iterable of the number of values for each document. 
+   * @param docToOrdCount Iterable of the number of values for each document. A zero ordinal
+   *                      count indicates a missing value.
    * @param ords Iterable of ordinal occurrences (docToOrdCount*maxDoc total).
    * @throws IOException if an I/O error occurred.
    */
@@ -107,7 +111,7 @@ public abstract class DocValuesConsumer implements Closeable {
    * The default implementation calls {@link #addNumericField}, passing
    * an Iterable that merges and filters deleted documents on the fly.
    */
-  public void mergeNumericField(FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge) throws IOException {
+  public void mergeNumericField(final FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge, final List<Bits> docsWithField) throws IOException {
 
     addNumericField(fieldInfo,
                     new Iterable<Number>() {
@@ -116,10 +120,11 @@ public abstract class DocValuesConsumer implements Closeable {
                         return new Iterator<Number>() {
                           int readerUpto = -1;
                           int docIDUpto;
-                          long nextValue;
+                          Long nextValue;
                           AtomicReader currentReader;
                           NumericDocValues currentValues;
                           Bits currentLiveDocs;
+                          Bits currentDocsWithField;
                           boolean nextIsSet;
 
                           @Override
@@ -139,7 +144,6 @@ public abstract class DocValuesConsumer implements Closeable {
                             }
                             assert nextIsSet;
                             nextIsSet = false;
-                            // TODO: make a mutable number
                             return nextValue;
                           }
 
@@ -155,6 +159,7 @@ public abstract class DocValuesConsumer implements Closeable {
                                   currentReader = mergeState.readers.get(readerUpto);
                                   currentValues = toMerge.get(readerUpto);
                                   currentLiveDocs = currentReader.getLiveDocs();
+                                  currentDocsWithField = docsWithField.get(readerUpto);
                                 }
                                 docIDUpto = 0;
                                 continue;
@@ -162,7 +167,11 @@ public abstract class DocValuesConsumer implements Closeable {
 
                               if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                                 nextIsSet = true;
-                                nextValue = currentValues.get(docIDUpto);
+                                if (currentDocsWithField.get(docIDUpto)) {
+                                  nextValue = currentValues.get(docIDUpto);
+                                } else {
+                                  nextValue = null;
+                                }
                                 docIDUpto++;
                                 return true;
                               }
@@ -181,7 +190,7 @@ public abstract class DocValuesConsumer implements Closeable {
    * The default implementation calls {@link #addBinaryField}, passing
    * an Iterable that merges and filters deleted documents on the fly.
    */
-  public void mergeBinaryField(FieldInfo fieldInfo, final MergeState mergeState, final List<BinaryDocValues> toMerge) throws IOException {
+  public void mergeBinaryField(FieldInfo fieldInfo, final MergeState mergeState, final List<BinaryDocValues> toMerge, final List<Bits> docsWithField) throws IOException {
 
     addBinaryField(fieldInfo,
                    new Iterable<BytesRef>() {
@@ -191,9 +200,11 @@ public abstract class DocValuesConsumer implements Closeable {
                          int readerUpto = -1;
                          int docIDUpto;
                          BytesRef nextValue = new BytesRef();
+                         BytesRef nextPointer; // points to null if missing, or nextValue
                          AtomicReader currentReader;
                          BinaryDocValues currentValues;
                          Bits currentLiveDocs;
+                         Bits currentDocsWithField;
                          boolean nextIsSet;
 
                          @Override
@@ -213,8 +224,7 @@ public abstract class DocValuesConsumer implements Closeable {
                            }
                            assert nextIsSet;
                            nextIsSet = false;
-                           // TODO: make a mutable number
-                           return nextValue;
+                           return nextPointer;
                          }
 
                          private boolean setNext() {
@@ -228,6 +238,7 @@ public abstract class DocValuesConsumer implements Closeable {
                                if (readerUpto < toMerge.size()) {
                                  currentReader = mergeState.readers.get(readerUpto);
                                  currentValues = toMerge.get(readerUpto);
+                                 currentDocsWithField = docsWithField.get(readerUpto);
                                  currentLiveDocs = currentReader.getLiveDocs();
                                }
                                docIDUpto = 0;
@@ -236,7 +247,12 @@ public abstract class DocValuesConsumer implements Closeable {
 
                              if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                                nextIsSet = true;
-                               currentValues.get(docIDUpto, nextValue);
+                               if (currentDocsWithField.get(docIDUpto)) {
+                                 currentValues.get(docIDUpto, nextValue);
+                                 nextPointer = nextValue;
+                               } else {
+                                 nextPointer = null;
+                               }
                                docIDUpto++;
                                return true;
                              }
@@ -272,7 +288,10 @@ public abstract class DocValuesConsumer implements Closeable {
         OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
         for (int i = 0; i < reader.maxDoc(); i++) {
           if (liveDocs.get(i)) {
-            bitset.set(dv.getOrd(i));
+            int ord = dv.getOrd(i);
+            if (ord >= 0) {
+              bitset.set(ord);
+            }
           }
         }
         liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
@@ -368,7 +387,7 @@ public abstract class DocValuesConsumer implements Closeable {
                   if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                     nextIsSet = true;
                     int segOrd = dvs[readerUpto].getOrd(docIDUpto);
-                    nextValue = (int) map.getGlobalOrd(readerUpto, segOrd);
+                    nextValue = segOrd == -1 ? -1 : (int) map.getGlobalOrd(readerUpto, segOrd);
                     docIDUpto++;
                     return true;
                   }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
index b2c5d549d27..04778aa1201 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@@ -25,6 +25,7 @@ import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.Bits;
 
 /** Abstract API that produces numeric, binary and
  * sorted docvalues.
@@ -56,4 +57,65 @@ public abstract class DocValuesProducer implements Closeable {
    *  The returned instance need not be thread-safe: it will only be
    *  used by a single thread. */
   public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;
+  
+  /** Returns a {@link Bits} at the size of <code>reader.maxDoc()</code>, 
+   *  with turned on bits for each docid that does have a value for this field.
+   *  The returned instance need not be thread-safe: it will only be
+   *  used by a single thread. */
+  public abstract Bits getDocsWithField(FieldInfo field) throws IOException;
+  
+  /** 
+   * A simple implementation of {@link DocValuesProducer#getDocsWithField} that 
+   * returns {@code true} if a document has an ordinal &gt;= 0
+   * <p>
+   * Codecs can choose to use this (or implement it more efficiently another way), but
+   * in most cases a Bits is unnecessary anyway: users can check this as they go.
+   */
+  public static class SortedDocsWithField implements Bits {
+    final SortedDocValues in;
+    final int maxDoc;
+    
+    public SortedDocsWithField(SortedDocValues in, int maxDoc) {
+      this.in = in;
+      this.maxDoc = maxDoc;
+    }
+    
+    @Override
+    public boolean get(int index) {
+      return in.getOrd(index) >= 0;
+    }
+
+    @Override
+    public int length() {
+      return maxDoc;
+    }
+  }
+  
+  /** 
+   * A simple implementation of {@link DocValuesProducer#getDocsWithField} that 
+   * returns {@code true} if a document has any ordinals.
+   * <p>
+   * Codecs can choose to use this (or implement it more efficiently another way), but
+   * in most cases a Bits is unnecessary anyway: users can check this as they go.
+   */
+  public static class SortedSetDocsWithField implements Bits {
+    final SortedSetDocValues in;
+    final int maxDoc;
+    
+    public SortedSetDocsWithField(SortedSetDocValues in, int maxDoc) {
+      this.in = in;
+      this.maxDoc = maxDoc;
+    }
+    
+    @Override
+    public boolean get(int index) {
+      in.setDocument(index);
+      return in.nextOrd() != SortedSetDocValues.NO_MORE_ORDS;
+    }
+
+    @Override
+    public int length() {
+      return maxDoc;
+    }
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java b/lucene/core/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java
new file mode 100644
index 00000000000..61e17598bcb
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java
@@ -0,0 +1,124 @@
+package org.apache.lucene.codecs;
+
+import java.util.Iterator;
+
+import org.apache.lucene.util.BytesRef;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** 
+ * a utility class to write missing values for SORTED_SET as if they were the empty string
+ * (to simulate pre-Lucene4.5 dv behavior for testing old codecs)
+ */
+// nocommit: move this to test-framework with all the impersonators of
+// these old codecs once new memory/disk codecs are written that support missing
+public class MissingOrdRemapper {
+  
+  /** insert an empty byte[] to the front of this iterable */
+  public static Iterable<BytesRef> insertEmptyValue(final Iterable<BytesRef> iterable) {
+    return new Iterable<BytesRef>() {
+      @Override
+      public Iterator<BytesRef> iterator() {
+        return new Iterator<BytesRef>() {
+          boolean seenEmpty = false;
+          Iterator<BytesRef> in = iterable.iterator();
+          
+          @Override
+          public boolean hasNext() {
+            return !seenEmpty || in.hasNext();
+          }
+
+          @Override
+          public BytesRef next() {
+            if (!seenEmpty) {
+              seenEmpty = true;
+              return new BytesRef();
+            } else {
+              return in.next();
+            }
+          }
+
+          @Override
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+    };
+  }
+  
+  /** remaps ord -1 to ord 0 on this iterable. */
+  public static Iterable<Number> mapMissingToOrd0(final Iterable<Number> iterable) {
+    return new Iterable<Number>() {
+      @Override
+      public Iterator<Number> iterator() {
+        return new Iterator<Number>() {
+          Iterator<Number> in = iterable.iterator();
+          
+          @Override
+          public boolean hasNext() {
+            return in.hasNext();
+          }
+
+          @Override
+          public Number next() {
+            Number n = in.next();
+            if (n.longValue() == -1) {
+              return 0;
+            } else {
+              return n;
+            }
+          }
+
+          @Override
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+    };
+  }
+  
+  /** remaps every ord+1 on this iterable */
+  public static Iterable<Number> mapAllOrds(final Iterable<Number> iterable) {
+    return new Iterable<Number>() {
+      @Override
+      public Iterator<Number> iterator() {
+        return new Iterator<Number>() {
+          Iterator<Number> in = iterable.iterator();
+          
+          @Override
+          public boolean hasNext() {
+            return in.hasNext();
+          }
+
+          @Override
+          public Number next() {
+            Number n = in.next();
+            return n.longValue()+1;
+          }
+
+          @Override
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+    };
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
index 21a082c1893..54617702c07 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
@@ -35,6 +35,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.store.CompoundFileDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.PagedBytes;
@@ -620,6 +621,11 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
     throw new IllegalStateException("Lucene 4.0 does not support SortedSet: how did you pull this off?");
   }
 
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    return new Bits.MatchAllBits(state.segmentInfo.getDocCount());
+  }
+
   @Override
   public void close() throws IOException {
     dir.close();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
index a1f6dc47d82..edd3bbd240a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
@@ -25,6 +25,7 @@ import java.util.NoSuchElementException;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.MissingOrdRemapper;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -106,7 +107,8 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
 
       long count = 0;
       for (Number nv : values) {
-        final long v = nv.longValue();
+        // TODO: support this as MemoryDVFormat (and be smart about missing maybe)
+        final long v = nv == null ? 0 : nv.longValue();
 
         if (gcd != 1) {
           if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
@@ -142,7 +144,7 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
       if (formatAndBits.bitsPerValue == 8 && minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
         meta.writeByte(UNCOMPRESSED); // uncompressed
         for (Number nv : values) {
-          data.writeByte((byte) nv.longValue());
+          data.writeByte(nv == null ? 0 : (byte) nv.longValue());
         }
       } else {
         meta.writeByte(TABLE_COMPRESSED); // table-compressed
@@ -160,7 +162,7 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
 
         final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
         for(Number nv : values) {
-          writer.add(encode.get(nv.longValue()));
+          writer.add(encode.get(nv == null ? 0 : nv.longValue()));
         }
         writer.finish();
       }
@@ -173,7 +175,8 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
 
       final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
       for (Number nv : values) {
-        writer.add((nv.longValue() - minValue) / gcd);
+        long value = nv == null ? 0 : nv.longValue();
+        writer.add((value - minValue) / gcd);
       }
       writer.finish();
     } else {
@@ -184,7 +187,7 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
 
       final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
       for (Number nv : values) {
-        writer.add(nv.longValue());
+        writer.add(nv == null ? 0 : nv.longValue());
       }
       writer.finish();
     }
@@ -216,9 +219,12 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
     int maxLength = Integer.MIN_VALUE;
     final long startFP = data.getFilePointer();
     for(BytesRef v : values) {
-      minLength = Math.min(minLength, v.length);
-      maxLength = Math.max(maxLength, v.length);
-      data.writeBytes(v.bytes, v.offset, v.length);
+      final int length = v == null ? 0 : v.length;
+      minLength = Math.min(minLength, length);
+      maxLength = Math.max(maxLength, length);
+      if (v != null) {
+        data.writeBytes(v.bytes, v.offset, v.length);
+      }
     }
     meta.writeLong(startFP);
     meta.writeLong(data.getFilePointer() - startFP);
@@ -234,7 +240,9 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
       final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
       long addr = 0;
       for (BytesRef v : values) {
-        addr += v.length;
+        if (v != null) {
+          addr += v.length;
+        }
         writer.add(addr);
       }
       writer.finish();
@@ -262,6 +270,33 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
 
   @Override
   public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+    // three cases for simulating the old writer:
+    // 1. no missing
+    // 2. missing (and empty string in use): remap ord=-1 -> ord=0
+    // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values
+    boolean anyMissing = false;
+    for (Number n : docToOrd) {
+      if (n.longValue() == -1) {
+        anyMissing = true;
+        break;
+      }
+    }
+    
+    boolean hasEmptyString = false;
+    for (BytesRef b : values) {
+      hasEmptyString = b.length == 0;
+      break;
+    }
+    
+    if (!anyMissing) {
+      // nothing to do
+    } else if (hasEmptyString) {
+      docToOrd = MissingOrdRemapper.mapMissingToOrd0(docToOrd);
+    } else {
+      docToOrd = MissingOrdRemapper.mapAllOrds(docToOrd);
+      values = MissingOrdRemapper.insertEmptyValue(values);
+    }
+    
     // write the ordinals as numerics
     addNumericField(field, docToOrd, false);
     
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
index 16ecf187b93..c2a95fb8195 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
@@ -429,6 +429,15 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
       }
     };
   }
+  
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
+      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+    } else {
+      return new Bits.MatchAllBits(maxDoc);
+    }
+  }
 
   @Override
   public void close() throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
index 72053a8b3a1..3ed6797e783 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
@@ -36,6 +36,7 @@ import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 
@@ -265,6 +266,12 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
       DocValuesProducer producer = fields.get(field.name);
       return producer == null ? null : producer.getSortedSet(field);
     }
+    
+    @Override
+    public Bits getDocsWithField(FieldInfo field) throws IOException {
+      DocValuesProducer producer = fields.get(field.name);
+      return producer == null ? null : producer.getDocsWithField(field);
+    }
 
     @Override
     public void close() throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
index 1b0e4168969..a3e28e85c11 100644
--- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
@@ -207,6 +207,12 @@ public abstract class AtomicReader extends IndexReader {
    *  this field.  The returned instance should only be
    *  used by a single thread. */
   public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;
+  
+  /** Returns a {@link Bits} at the size of <code>reader.maxDoc()</code>, 
+   *  with turned on bits for each docid that does have a value for this field,
+   *  or null if no DocValues were indexed for this field. The
+   *  returned instance should only be used by a single thread */
+  public abstract Bits getDocsWithField(String field) throws IOException;
 
   /** Returns {@link NumericDocValues} representing norms
    *  for this field, or null if no {@link NumericDocValues}
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index 643408ff9b0..553f9ff1ff7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -26,6 +26,8 @@ import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
 import org.apache.lucene.util.packed.PackedInts;
 
@@ -38,6 +40,9 @@ class BinaryDocValuesWriter extends DocValuesWriter {
 
   private final ByteBlockPool pool;
   private final AppendingDeltaPackedLongBuffer lengths;
+  private final OpenBitSet docsWithField;
+  private final Counter iwBytesUsed;
+  private long bytesUsed;
   private final FieldInfo fieldInfo;
   private int addedValues = 0;
 
@@ -45,6 +50,10 @@ class BinaryDocValuesWriter extends DocValuesWriter {
     this.fieldInfo = fieldInfo;
     this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
     this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+    this.iwBytesUsed = iwBytesUsed;
+    this.docsWithField = new OpenBitSet();
+    this.bytesUsed = docsWithFieldBytesUsed();
+    iwBytesUsed.addAndGet(bytesUsed);
   }
 
   public void addValue(int docID, BytesRef value) {
@@ -66,6 +75,19 @@ class BinaryDocValuesWriter extends DocValuesWriter {
     addedValues++;
     lengths.add(value.length);
     pool.append(value);
+    docsWithField.set(docID);
+    updateBytesUsed();
+  }
+  
+  private long docsWithFieldBytesUsed() {
+    // nocommit: this is not correct
+    return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
+  }
+  
+  private void updateBytesUsed() {
+    final long newBytesUsed = docsWithFieldBytesUsed();
+    iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
+    bytesUsed = newBytesUsed;
   }
 
   @Override
@@ -111,19 +133,23 @@ class BinaryDocValuesWriter extends DocValuesWriter {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
+      final BytesRef v;
       if (upto < size) {
         int length = (int) lengthsIterator.next();
         value.grow(length);
         value.length = length;
         pool.readBytes(byteOffset, value.bytes, value.offset, value.length);
         byteOffset += length;
+        if (docsWithField.get(upto)) {
+          v = value;
+        } else {
+          v = null;
+        }
       } else {
-        // This is to handle last N documents not having
-        // this DV field in the end of the segment:
-        value.length = 0;
+        v = null;
       }
       upto++;
-      return value;
+      return v;
     }
 
     @Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 27a1aface60..5a702d29e39 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -1280,7 +1280,8 @@ public class CheckIndex {
           if (reader.getBinaryDocValues(fieldInfo.name) != null ||
               reader.getNumericDocValues(fieldInfo.name) != null ||
               reader.getSortedDocValues(fieldInfo.name) != null || 
-              reader.getSortedSetDocValues(fieldInfo.name) != null) {
+              reader.getSortedSetDocValues(fieldInfo.name) != null || 
+              reader.getDocsWithField(fieldInfo.name) != null) {
             throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");
           }
         }
@@ -1301,26 +1302,37 @@ public class CheckIndex {
     return status;
   }
   
-  private static void checkBinaryDocValues(String fieldName, AtomicReader reader, BinaryDocValues dv) {
+  private static void checkBinaryDocValues(String fieldName, AtomicReader reader, BinaryDocValues dv, Bits docsWithField) {
     BytesRef scratch = new BytesRef();
     for (int i = 0; i < reader.maxDoc(); i++) {
       dv.get(i, scratch);
       assert scratch.isValid();
+      if (docsWithField.get(i) == false && scratch.length > 0) {
+        throw new RuntimeException("dv for field: " + fieldName + " is missing but has value=" + scratch + " for doc: " + i);
+      }
     }
   }
   
-  private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv) {
-    checkBinaryDocValues(fieldName, reader, dv);
+  private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField) {
+    checkBinaryDocValues(fieldName, reader, dv, docsWithField);
     final int maxOrd = dv.getValueCount()-1;
     FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
     int maxOrd2 = -1;
     for (int i = 0; i < reader.maxDoc(); i++) {
       int ord = dv.getOrd(i);
-      if (ord < 0 || ord > maxOrd) {
+      if (ord == -1) {
+        if (docsWithField.get(i)) {
+          throw new RuntimeException("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i);
+        }
+      } else if (ord < -1 || ord > maxOrd) {
         throw new RuntimeException("ord out of bounds: " + ord);
+      } else {
+        if (!docsWithField.get(i)) {
+          throw new RuntimeException("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i);
+        }
+        maxOrd2 = Math.max(maxOrd2, ord);
+        seenOrds.set(ord);
       }
-      maxOrd2 = Math.max(maxOrd2, ord);
-      seenOrds.set(ord);
     }
     if (maxOrd != maxOrd2) {
       throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
@@ -1342,7 +1354,7 @@ public class CheckIndex {
     }
   }
   
-  private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv) {
+  private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) {
     final long maxOrd = dv.getValueCount()-1;
     OpenBitSet seenOrds = new OpenBitSet(dv.getValueCount());
     long maxOrd2 = -1;
@@ -1350,16 +1362,28 @@ public class CheckIndex {
       dv.setDocument(i);
       long lastOrd = -1;
       long ord;
-      while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-        if (ord <= lastOrd) {
-          throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
+      if (docsWithField.get(i)) {
+        int ordCount = 0;
+        while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+          ordCount++;
+          if (ord <= lastOrd) {
+            throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
+          }
+          if (ord < 0 || ord > maxOrd) {
+            throw new RuntimeException("ord out of bounds: " + ord);
+          }
+          lastOrd = ord;
+          maxOrd2 = Math.max(maxOrd2, ord);
+          seenOrds.set(ord);
         }
-        if (ord < 0 || ord > maxOrd) {
-          throw new RuntimeException("ord out of bounds: " + ord);
+        if (ordCount == 0) {
+          throw new RuntimeException("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i);
+        }
+      } else {
+        long o = dv.nextOrd();
+        if (o != SortedSetDocValues.NO_MORE_ORDS) {
+          throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i);
         }
-        lastOrd = ord;
-        maxOrd2 = Math.max(maxOrd2, ord);
-        seenOrds.set(ord);
       }
     }
     if (maxOrd != maxOrd2) {
@@ -1383,17 +1407,26 @@ public class CheckIndex {
     }
   }
 
-  private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv) {
+  private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField) {
     for (int i = 0; i < reader.maxDoc(); i++) {
-      ndv.get(i);
+      long value = ndv.get(i);
+      if (docsWithField.get(i) == false && value > 0) {
+        throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i);
+      }
     }
   }
   
   private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream, DocValuesStatus status) throws Exception {
+    Bits docsWithField = reader.getDocsWithField(fi.name);
+    if (docsWithField == null) {
+      throw new RuntimeException(fi.name + " docsWithField does not exist");
+    } else if (docsWithField.length() != reader.maxDoc()) {
+      throw new RuntimeException(fi.name + " docsWithField has incorrect length: " + docsWithField.length() + ",expected: " + reader.maxDoc());
+    }
     switch(fi.getDocValuesType()) {
       case SORTED:
         status.totalSortedFields++;
-        checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name));
+        checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name), docsWithField);
         if (reader.getBinaryDocValues(fi.name) != null ||
             reader.getNumericDocValues(fi.name) != null ||
             reader.getSortedSetDocValues(fi.name) != null) {
@@ -1402,7 +1435,7 @@ public class CheckIndex {
         break;
       case SORTED_SET:
         status.totalSortedSetFields++;
-        checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name));
+        checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name), docsWithField);
         if (reader.getBinaryDocValues(fi.name) != null ||
             reader.getNumericDocValues(fi.name) != null ||
             reader.getSortedDocValues(fi.name) != null) {
@@ -1411,7 +1444,7 @@ public class CheckIndex {
         break;
       case BINARY:
         status.totalBinaryFields++;
-        checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name));
+        checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name), docsWithField);
         if (reader.getNumericDocValues(fi.name) != null ||
             reader.getSortedDocValues(fi.name) != null ||
             reader.getSortedSetDocValues(fi.name) != null) {
@@ -1420,7 +1453,7 @@ public class CheckIndex {
         break;
       case NUMERIC:
         status.totalNumericFields++;
-        checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name));
+        checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name), docsWithField);
         if (reader.getBinaryDocValues(fi.name) != null ||
             reader.getSortedDocValues(fi.name) != null ||
             reader.getSortedSetDocValues(fi.name) != null) {
@@ -1435,7 +1468,7 @@ public class CheckIndex {
   private static void checkNorms(FieldInfo fi, AtomicReader reader, PrintStream infoStream) throws IOException {
     switch(fi.getNormType()) {
       case NUMERIC:
-        checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name));
+        checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc()));
         break;
       default:
         throw new AssertionError("wtf: " + fi.getNormType());
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java
index 90f2e4514f6..cb1b30154dd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java
@@ -143,7 +143,7 @@ final class DocValuesProcessor extends StoredFieldsConsumer {
     DocValuesWriter writer = writers.get(fieldInfo.name);
     NumericDocValuesWriter numericWriter;
     if (writer == null) {
-      numericWriter = new NumericDocValuesWriter(fieldInfo, bytesUsed);
+      numericWriter = new NumericDocValuesWriter(fieldInfo, bytesUsed, true);
       writers.put(fieldInfo.name, numericWriter);
     } else if (!(writer instanceof NumericDocValuesWriter)) {
       throw new IllegalArgumentException("Incompatible DocValues type: field \"" + fieldInfo.name + "\" changed from " + getTypeDesc(writer) + " to numeric");
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
index 93be7a66105..4a8a55a3433 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
@@ -414,4 +414,10 @@ public class FilterAtomicReader extends AtomicReader {
     return in.getNormValues(field);
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    ensureOpen();
+    return in.getDocsWithField(field);
+  }
+
 }
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
index 8f262a8bb5e..1e6671ea05c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@@ -22,6 +22,7 @@ import java.util.List;
 
 import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
 import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
 import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
@@ -135,6 +136,51 @@ public class MultiDocValues {
       };
     }
   }
+  
+  /** Returns a Bits for a reader's docsWithField (potentially merging on-the-fly) 
+   * <p>
+   * This is a slow way to access this bitset. Instead, access them per-segment
+   * with {@link AtomicReader#getDocsWithField(String)}
+   * </p> 
+   * */
+  public static Bits getDocsWithField(final IndexReader r, final String field) throws IOException {
+    final List<AtomicReaderContext> leaves = r.leaves();
+    final int size = leaves.size();
+    if (size == 0) {
+      return null;
+    } else if (size == 1) {
+      return leaves.get(0).reader().getDocsWithField(field);
+    }
+
+    boolean anyReal = false;
+    boolean anyMissing = false;
+    final Bits[] values = new Bits[size];
+    final int[] starts = new int[size+1];
+    for (int i = 0; i < size; i++) {
+      AtomicReaderContext context = leaves.get(i);
+      Bits v = context.reader().getDocsWithField(field);
+      if (v == null) {
+        v = new Bits.MatchNoBits(context.reader().maxDoc());
+        anyMissing = true;
+      } else {
+        anyReal = true;
+        if (v instanceof Bits.MatchAllBits == false) {
+          anyMissing = true;
+        }
+      }
+      values[i] = v;
+      starts[i] = context.docBase;
+    }
+    starts[size] = r.maxDoc();
+
+    if (!anyReal) {
+      return null;
+    } else if (!anyMissing) {
+      return new Bits.MatchAllBits(r.maxDoc());
+    } else {
+      return new MultiBits(values, starts, false);
+    }
+  }
 
   /** Returns a BinaryDocValues for a reader's docvalues (potentially merging on-the-fly)
    * <p>
diff --git a/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java b/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java
index 4a3219eaa23..724c9ed05e0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java
@@ -44,7 +44,7 @@ final class NormsConsumerPerField extends InvertedDocEndConsumerPerField impleme
     if (fieldInfo.isIndexed() && !fieldInfo.omitsNorms()) {
       if (consumer == null) {
         fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC);
-        consumer = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed);
+        consumer = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed, false);
       }
       consumer.addValue(docState.docID, similarity.computeNorm(fieldState));
     }
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index cc070830927..7c5aa83fdae 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -23,6 +23,8 @@ import java.util.NoSuchElementException;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
 import org.apache.lucene.util.packed.PackedInts;
 
@@ -35,14 +37,18 @@ class NumericDocValuesWriter extends DocValuesWriter {
   private AppendingDeltaPackedLongBuffer pending;
   private final Counter iwBytesUsed;
   private long bytesUsed;
+  private final OpenBitSet docsWithField;
   private final FieldInfo fieldInfo;
+  private final boolean trackDocsWithField;
 
-  public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
+  public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, boolean trackDocsWithField) {
     pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
-    bytesUsed = pending.ramBytesUsed();
+    docsWithField = new OpenBitSet();
+    bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
     this.fieldInfo = fieldInfo;
     this.iwBytesUsed = iwBytesUsed;
     iwBytesUsed.addAndGet(bytesUsed);
+    this.trackDocsWithField = trackDocsWithField;
   }
 
   public void addValue(int docID, long value) {
@@ -56,12 +62,20 @@ class NumericDocValuesWriter extends DocValuesWriter {
     }
 
     pending.add(value);
+    if (trackDocsWithField) {
+      docsWithField.set(docID);
+    }
 
     updateBytesUsed();
   }
+  
+  private long docsWithFieldBytesUsed() {
+    // nocommit: this is not correct
+    return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
+  }
 
   private void updateBytesUsed() {
-    final long newBytesUsed = pending.ramBytesUsed();
+    final long newBytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
     iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
     bytesUsed = newBytesUsed;
   }
@@ -109,14 +123,18 @@ class NumericDocValuesWriter extends DocValuesWriter {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      long value;
+      Long value;
       if (upto < size) {
-        value = iter.next();
+        long v = iter.next();
+        if (!trackDocsWithField || docsWithField.get(upto)) {
+          value = v;
+        } else {
+          value = null;
+        }
       } else {
-        value = 0;
+        value = trackDocsWithField ? null : MISSING;
       }
       upto++;
-      // TODO: make reusable Number
       return value;
     }
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
index c7174d8e7b5..cbc4bbd7f3e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
@@ -285,6 +285,13 @@ public class ParallelAtomicReader extends AtomicReader {
     return reader == null ? null : reader.getSortedSetDocValues(field);
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    ensureOpen();
+    AtomicReader reader = fieldToReader.get(field);
+    return reader == null ? null : reader.getDocsWithField(field);
+  }
+
   @Override
   public NumericDocValues getNormValues(String field) throws IOException {
     ensureOpen();
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
index ab0348293a9..3a526aad54f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
@@ -36,6 +36,7 @@ import org.apache.lucene.index.SegmentReader.CoreClosedListener;
 import org.apache.lucene.store.CompoundFileDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.CloseableThreadLocal;
 import org.apache.lucene.util.IOUtils;
 
@@ -87,6 +88,13 @@ final class SegmentCoreReaders {
       return new HashMap<String,Object>();
     }
   };
+  
+  final CloseableThreadLocal<Map<String,Bits>> docsWithFieldLocal = new CloseableThreadLocal<Map<String,Bits>>() {
+    @Override
+    protected Map<String,Bits> initialValue() {
+      return new HashMap<String,Bits>();
+    }
+  };
 
   final CloseableThreadLocal<Map<String,Object>> normsLocal = new CloseableThreadLocal<Map<String,Object>>() {
     @Override
@@ -274,6 +282,30 @@ final class SegmentCoreReaders {
 
     return dvs;
   }
+  
+  Bits getDocsWithField(String field) throws IOException {
+    FieldInfo fi = fieldInfos.fieldInfo(field);
+    if (fi == null) {
+      // Field does not exist
+      return null;
+    }
+    if (fi.getDocValuesType() == null) {
+      // Field was not indexed with doc values
+      return null;
+    }
+
+    assert dvProducer != null;
+
+    Map<String,Bits> dvFields = docsWithFieldLocal.get();
+
+    Bits dvs = dvFields.get(field);
+    if (dvs == null) {
+      dvs = dvProducer.getDocsWithField(fi);
+      dvFields.put(field, dvs);
+    }
+
+    return dvs;
+  }
 
   NumericDocValues getNormValues(String field) throws IOException {
     FieldInfo fi = fieldInfos.fieldInfo(field);
@@ -300,8 +332,8 @@ final class SegmentCoreReaders {
 
   void decRef() throws IOException {
     if (ref.decrementAndGet() == 0) {
-      IOUtils.close(termVectorsLocal, fieldsReaderLocal, docValuesLocal, normsLocal, fields, dvProducer,
-                    termVectorsReaderOrig, fieldsReaderOrig, cfsReader, normsProducer);
+      IOUtils.close(termVectorsLocal, fieldsReaderLocal, docValuesLocal, normsLocal, docsWithFieldLocal, fields, 
+                    dvProducer, termVectorsReaderOrig, fieldsReaderOrig, cfsReader, normsProducer);
       notifyCoreClosedListeners();
     }
   }
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
index f121e85b10f..718687bcc85 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -30,6 +30,7 @@ import org.apache.lucene.codecs.TermVectorsWriter;
 import org.apache.lucene.index.FieldInfo.DocValuesType;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InfoStream;
 
@@ -156,24 +157,32 @@ final class SegmentMerger {
         if (type != null) {
           if (type == DocValuesType.NUMERIC) {
             List<NumericDocValues> toMerge = new ArrayList<NumericDocValues>();
+            List<Bits> docsWithField = new ArrayList<Bits>();
             for (AtomicReader reader : mergeState.readers) {
               NumericDocValues values = reader.getNumericDocValues(field.name);
+              Bits bits = reader.getDocsWithField(field.name);
               if (values == null) {
                 values = NumericDocValues.EMPTY;
+                bits = new Bits.MatchNoBits(reader.maxDoc());
               }
               toMerge.add(values);
+              docsWithField.add(bits);
             }
-            consumer.mergeNumericField(field, mergeState, toMerge);
+            consumer.mergeNumericField(field, mergeState, toMerge, docsWithField);
           } else if (type == DocValuesType.BINARY) {
             List<BinaryDocValues> toMerge = new ArrayList<BinaryDocValues>();
+            List<Bits> docsWithField = new ArrayList<Bits>();
             for (AtomicReader reader : mergeState.readers) {
               BinaryDocValues values = reader.getBinaryDocValues(field.name);
+              Bits bits = reader.getDocsWithField(field.name);
               if (values == null) {
                 values = BinaryDocValues.EMPTY;
+                bits = new Bits.MatchNoBits(reader.maxDoc());
               }
               toMerge.add(values);
+              docsWithField.add(bits);
             }
-            consumer.mergeBinaryField(field, mergeState, toMerge);
+            consumer.mergeBinaryField(field, mergeState, toMerge, docsWithField);
           } else if (type == DocValuesType.SORTED) {
             List<SortedDocValues> toMerge = new ArrayList<SortedDocValues>();
             for (AtomicReader reader : mergeState.readers) {
@@ -216,14 +225,16 @@ final class SegmentMerger {
       for (FieldInfo field : mergeState.fieldInfos) {
         if (field.hasNorms()) {
           List<NumericDocValues> toMerge = new ArrayList<NumericDocValues>();
+          List<Bits> docsWithField = new ArrayList<Bits>();
           for (AtomicReader reader : mergeState.readers) {
             NumericDocValues norms = reader.getNormValues(field.name);
             if (norms == null) {
               norms = NumericDocValues.EMPTY;
             }
             toMerge.add(norms);
+            docsWithField.add(new Bits.MatchAllBits(reader.maxDoc()));
           }
-          consumer.mergeNumericField(field, mergeState, toMerge);
+          consumer.mergeNumericField(field, mergeState, toMerge, docsWithField);
         }
       }
       success = true;
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
index 8214a980cd9..c6cf702955b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
@@ -223,6 +223,12 @@ public final class SegmentReader extends AtomicReader {
     return core.getNumericDocValues(field);
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    ensureOpen();
+    return core.getDocsWithField(field);
+  }
+
   @Override
   public BinaryDocValues getBinaryDocValues(String field) throws IOException {
     ensureOpen();
diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
index bce0ef64257..b7af7d0ba88 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
@@ -91,6 +91,12 @@ public final class SlowCompositeReaderWrapper extends AtomicReader {
     return MultiDocValues.getNumericValues(in, field);
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    ensureOpen();
+    return MultiDocValues.getDocsWithField(in, field);
+  }
+
   @Override
   public BinaryDocValues getBinaryDocValues(String field) throws IOException {
     ensureOpen();
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
index c7dae5b3dd9..1968a791157 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@@ -37,7 +37,8 @@ public abstract class SortedDocValues extends BinaryDocValues {
    * Returns the ordinal for the specified docID.
    * @param  docID document ID to lookup
    * @return ordinal for the document: this is dense, starts at 0, then
-   *         increments by 1 for the next value in sorted order. 
+   *         increments by 1 for the next value in sorted order. Note that
+   *         missing values are indicated by -1.
    */
   public abstract int getOrd(int docID);
 
@@ -71,7 +72,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
   public static final SortedDocValues EMPTY = new SortedDocValues() {
     @Override
     public int getOrd(int docID) {
-      return 0;
+      return -1;
     }
 
     @Override
@@ -83,7 +84,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
 
     @Override
     public int getValueCount() {
-      return 1;
+      return 0;
     }
   };
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
index d337a0ca1c5..4d42a2e15c9 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
@@ -30,19 +30,19 @@ import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
 import org.apache.lucene.util.packed.PackedInts;
 
 /** Buffers up pending byte[] per doc, deref and sorting via
  *  int ord, then flushes when segment flushes. */
 class SortedDocValuesWriter extends DocValuesWriter {
   final BytesRefHash hash;
-  private AppendingPackedLongBuffer pending;
+  private AppendingDeltaPackedLongBuffer pending;
   private final Counter iwBytesUsed;
   private long bytesUsed; // this currently only tracks differences in 'pending'
   private final FieldInfo fieldInfo;
 
-  private static final BytesRef EMPTY = new BytesRef(BytesRef.EMPTY_BYTES);
+  private static final int EMPTY_ORD = -1;
 
   public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
     this.fieldInfo = fieldInfo;
@@ -52,7 +52,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
             new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
             BytesRefHash.DEFAULT_CAPACITY,
             new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
-    pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
+    pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     bytesUsed = pending.ramBytesUsed();
     iwBytesUsed.addAndGet(bytesUsed);
   }
@@ -70,7 +70,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
 
     // Fill in any holes:
     while(pending.size() < docID) {
-      addOneValue(EMPTY);
+      pending.add(EMPTY_ORD);
     }
 
     addOneValue(value);
@@ -79,8 +79,9 @@ class SortedDocValuesWriter extends DocValuesWriter {
   @Override
   public void finish(int maxDoc) {
     while(pending.size() < maxDoc) {
-      addOneValue(EMPTY);
+      pending.add(EMPTY_ORD);
     }
+    updateBytesUsed();
   }
 
   private void addOneValue(BytesRef value) {
@@ -177,7 +178,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
   
   // iterates over the ords for each doc we have in ram
   private class OrdsIterator implements Iterator<Number> {
-    final AppendingPackedLongBuffer.Iterator iter = pending.iterator();
+    final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
     final int ordMap[];
     final int maxDoc;
     int docUpto;
@@ -200,8 +201,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
       }
       int ord = (int) iter.next();
       docUpto++;
-      // TODO: make reusable Number
-      return ordMap[ord];
+      return ord == -1 ? ord : ordMap[ord];
     }
 
     @Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
index f87fb51acf3..cce1024ed5d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
@@ -104,26 +104,6 @@ public interface FieldCache {
       }
     };
   }
-  
-  /** Returns MISSING/-1 ordinal for every document */
-  public static final SortedDocValues EMPTY_TERMSINDEX = new SortedDocValues() {
-    @Override
-    public int getOrd(int docID) {
-      return -1;
-    }
-
-    @Override
-    public void lookupOrd(int ord, BytesRef result) {
-      result.bytes = MISSING;
-      result.offset = 0;
-      result.length = 0;
-    }
-
-    @Override
-    public int getValueCount() {
-      return 0;
-    }
-  };
 
   /**
    * Placeholder indicating creation of this cache is currently in-progress.
@@ -266,13 +246,10 @@ public interface FieldCache {
     }
   };
   
- 
   /** Checks the internal cache for an appropriate entry, and if none is found,
    *  reads the terms in <code>field</code> and returns a bit set at the size of
    *  <code>reader.maxDoc()</code>, with turned on bits for each docid that 
-   *  does have a value for this field.  Note that if the field was only indexed
-   *  as DocValues then this method will not work (it will return a Bits stating
-   *  that no documents contain the field).
+   *  does have a value for this field.
    */
   public Bits getDocsWithField(AtomicReader reader, String field) throws IOException;
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
index 2dc5cb6bf9a..b8e81d17ea4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
@@ -501,8 +501,7 @@ class FieldCacheImpl implements FieldCache {
       // field does not exist or has no value
       return new Bits.MatchNoBits(reader.maxDoc());
     } else if (fieldInfo.hasDocValues()) {
-      // doc values are dense
-      return new Bits.MatchAllBits(reader.maxDoc());
+      return reader.getDocsWithField(field);
     } else if (!fieldInfo.isIndexed()) {
       return new Bits.MatchNoBits(reader.maxDoc());
     }
@@ -944,13 +943,13 @@ class FieldCacheImpl implements FieldCache {
     } else {
       final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
       if (info == null) {
-        return EMPTY_TERMSINDEX;
+        return SortedDocValues.EMPTY;
       } else if (info.hasDocValues()) {
         // we don't try to build a sorted instance from numeric/binary doc
         // values because dedup can be very costly
         throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
       } else if (!info.isIndexed()) {
-        return EMPTY_TERMSINDEX;
+        return SortedDocValues.EMPTY;
       }
       return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
     }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
index 2c498ae7211..da1cf218617 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
@@ -569,7 +569,7 @@ public class TestFieldCache extends LuceneTestCase {
       assertEquals(2, sortedSet.getValueCount());
     
       bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
-      assertTrue(bits instanceof Bits.MatchAllBits);
+      assertTrue(bits.get(0));
     }
     
     ir.close();
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java
index ad7cb27caca..80daa1cc588 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java
@@ -31,15 +31,18 @@ import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 
 class Facet42DocValuesProducer extends DocValuesProducer {
 
   private final Map<Integer,Facet42BinaryDocValues> fields = new HashMap<Integer,Facet42BinaryDocValues>();
+  private final int maxDoc;
   
   Facet42DocValuesProducer(SegmentReadState state) throws IOException {
     String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
     IndexInput in = state.directory.openInput(fileName, state.context);
+    this.maxDoc = state.segmentInfo.getDocCount();
     boolean success = false;
     try {
       CodecUtil.checkHeader(in, Facet42DocValuesFormat.CODEC, 
@@ -80,6 +83,11 @@ class Facet42DocValuesProducer extends DocValuesProducer {
     throw new UnsupportedOperationException("FacetsDocValues only implements binary");
   }
 
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    return new Bits.MatchAllBits(maxDoc); // TODO: have codec impl this?
+  }
+
   @Override
   public void close() throws IOException {
   }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/util/FacetsPayloadMigrationReader.java b/lucene/facet/src/java/org/apache/lucene/facet/util/FacetsPayloadMigrationReader.java
index ec4d2c9d638..e3d998d5159 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/util/FacetsPayloadMigrationReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/util/FacetsPayloadMigrationReader.java
@@ -40,6 +40,7 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
 /**
@@ -222,6 +223,18 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
     }
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    Term term = fieldTerms.get(field);
+    if (term == null) {
+      return super.getDocsWithField(field);
+    } else {
+      // we shouldn't return null, even if the term does not exist or has no
+      // payloads, since we already marked the field as having DocValues.
+      return new Bits.MatchAllBits(maxDoc());
+    }
+  }
+
   @Override
   public FieldInfos getFieldInfos() {
     FieldInfos innerInfos = super.getFieldInfos();
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
index a207b970de3..a2b1bd45b2d 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@@ -422,6 +422,11 @@ public class WeightedSpanTermExtractor {
     public NumericDocValues getNormValues(String field) throws IOException {
       return super.getNormValues(FIELD_NAME);
     }
+
+    @Override
+    public Bits getDocsWithField(String field) throws IOException {
+      return super.getDocsWithField(FIELD_NAME);
+    }
   }
 
   /**
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 8107aab3b68..3a0c6bbe62f 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -756,6 +756,11 @@ public class MemoryIndex {
       return null;
     }
 
+    @Override
+    public Bits getDocsWithField(String field) throws IOException {
+      return null;
+    }
+
     private class MemoryFields extends Fields {
       @Override
       public Iterator<String> iterator() {
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
index f7cce128692..469357dab63 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
@@ -221,6 +221,27 @@ public class SortingAtomicReader extends FilterAtomicReader {
     }
   }
   
+  private static class SortingBits implements Bits {
+
+    private final Bits in;
+    private final Sorter.DocMap docMap;
+
+    public SortingBits(final Bits in, Sorter.DocMap docMap) {
+      this.in = in;
+      this.docMap = docMap;
+    }
+
+    @Override
+    public boolean get(int index) {
+      return in.get(docMap.newToOld(index));
+    }
+
+    @Override
+    public int length() {
+      return in.length();
+    }
+  }
+  
   private static class SortingSortedDocValues extends SortedDocValues {
     
     private final SortedDocValues in;
@@ -743,20 +764,9 @@ public class SortingAtomicReader extends FilterAtomicReader {
     final Bits inLiveDocs = in.getLiveDocs();
     if (inLiveDocs == null) {
       return null;
+    } else {
+      return new SortingBits(inLiveDocs, docMap);
     }
-    return new Bits() {
-
-      @Override
-      public boolean get(int index) {
-        return inLiveDocs.get(docMap.newToOld(index));
-      }
-
-      @Override
-      public int length() {
-        return inLiveDocs.length();
-      }
-
-    };
   }
   
   @Override
@@ -796,6 +806,16 @@ public class SortingAtomicReader extends FilterAtomicReader {
     }  
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    Bits bits = in.getDocsWithField(field);
+    if (bits == null || bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) {
+      return bits;
+    } else {
+      return new SortingBits(bits, docMap);
+    }
+  }
+
   @Override
   public Fields getTermVectors(final int docID) throws IOException {
     return in.getTermVectors(docMap.newToOld(docID));
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
index 998bea7835f..a59e4c58c06 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
@@ -33,6 +33,7 @@ import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.OpenBitSet;
@@ -75,11 +76,10 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
     public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
       int count = 0;
       for (Number v : values) {
-        assert v != null;
         count++;
       }
       assert count == maxDoc;
-      checkIterator(values.iterator(), maxDoc);
+      checkIterator(values.iterator(), maxDoc, true);
       in.addNumericField(field, values);
     }
     
@@ -87,12 +87,11 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
     public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
       int count = 0;
       for (BytesRef b : values) {
-        assert b != null;
-        assert b.isValid();
+        assert b == null || b.isValid();
         count++;
       }
       assert count == maxDoc;
-      checkIterator(values.iterator(), maxDoc);
+      checkIterator(values.iterator(), maxDoc, true);
       in.addBinaryField(field, values);
     }
     
@@ -117,15 +116,17 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
       for (Number v : docToOrd) {
         assert v != null;
         int ord = v.intValue();
-        assert ord >= 0 && ord < valueCount;
-        seenOrds.set(ord);
+        assert ord >= -1 && ord < valueCount;
+        if (ord >= 0) {
+          seenOrds.set(ord);
+        }
         count++;
       }
       
       assert count == maxDoc;
       assert seenOrds.cardinality() == valueCount;
-      checkIterator(values.iterator(), valueCount);
-      checkIterator(docToOrd.iterator(), maxDoc);
+      checkIterator(values.iterator(), valueCount, false);
+      checkIterator(docToOrd.iterator(), maxDoc, false);
       in.addSortedField(field, values, docToOrd);
     }
     
@@ -169,18 +170,18 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
       
       assert docCount == maxDoc;
       assert seenOrds.cardinality() == valueCount;
-      checkIterator(values.iterator(), valueCount);
-      checkIterator(docToOrdCount.iterator(), maxDoc);
-      checkIterator(ords.iterator(), ordCount);
+      checkIterator(values.iterator(), valueCount, false);
+      checkIterator(docToOrdCount.iterator(), maxDoc, false);
+      checkIterator(ords.iterator(), ordCount, false);
       in.addSortedSetField(field, values, docToOrdCount, ords);
     }
 
-    private <T> void checkIterator(Iterator<T> iterator, long expectedSize) {
+    private <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
       for (long i = 0; i < expectedSize; i++) {
         boolean hasNext = iterator.hasNext();
         assert hasNext;
         T v = iterator.next();
-        assert v != null;
+        assert allowNull || v != null;
         try {
           iterator.remove();
           throw new AssertionError("broken iterator (supports remove): " + iterator);
@@ -244,6 +245,15 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
       assert values != null;
       return new AssertingAtomicReader.AssertingSortedSetDocValues(values, maxDoc);
     }
+    
+    @Override
+    public Bits getDocsWithField(FieldInfo field) throws IOException {
+      assert field.getDocValuesType() != null;
+      Bits bits = in.getDocsWithField(field);
+      assert bits != null;
+      assert bits.length() == maxDoc;
+      return bits; // TODO: add AssertingBits w/ bounds check
+    }
 
     @Override
     public void close() throws IOException {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
index 52f36d2ae40..f6098dc1f21 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesProducer.SortedSetDocsWithField;
 import org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer;
 import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
 import org.apache.lucene.index.BinaryDocValues;
@@ -38,6 +39,7 @@ import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.BlockPackedReader;
@@ -50,9 +52,11 @@ class CheapBastardDocValuesProducer extends DocValuesProducer {
   private final Map<Integer,NumericEntry> ordIndexes;
   private final Map<Integer,BinaryEntry> binaries;
   private final IndexInput data;
+  private final int maxDoc;
   
   CheapBastardDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+    this.maxDoc = state.segmentInfo.getDocCount();
     // read in the entries from the metadata file.
     IndexInput in = state.directory.openInput(metaName, state.context);
     boolean success = false;
@@ -380,6 +384,15 @@ class CheapBastardDocValuesProducer extends DocValuesProducer {
       }
     };
   }
+  
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
+      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+    } else {
+      return new Bits.MatchAllBits(maxDoc);
+    }
+  }
 
   @Override
   public void close() throws IOException {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
index 2769abd6e13..15b3081c848 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
@@ -24,7 +24,9 @@ import java.util.TreeSet;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.MissingOrdRemapper;
 import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
+import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -54,7 +56,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     long minValue = Long.MAX_VALUE;
     long maxValue = Long.MIN_VALUE;
     for (Number n : values) {
-      long v = n.longValue();
+      long v = n == null ? 0 : n.longValue();
       minValue = Math.min(minValue, v);
       maxValue = Math.max(maxValue, v);
     }
@@ -92,7 +94,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
                           Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     output.writeInt(1); // size
     for (Number n : values) {
-      output.writeByte(n.byteValue());
+      output.writeByte(n == null ? 0 : n.byteValue());
     }
   }
   
@@ -103,7 +105,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
                           Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     output.writeInt(2); // size
     for (Number n : values) {
-      output.writeShort(n.shortValue());
+      output.writeShort(n == null ? 0 : n.shortValue());
     }
   }
   
@@ -114,7 +116,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
                           Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     output.writeInt(4); // size
     for (Number n : values) {
-      output.writeInt(n.intValue());
+      output.writeInt(n == null ? 0 : n.intValue());
     }
   }
   
@@ -131,7 +133,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
       // writes longs
       output.writeByte(Lucene40DocValuesFormat.VAR_INTS_FIXED_64);
       for (Number n : values) {
-        output.writeLong(n.longValue());
+        output.writeLong(n == null ? 0 : n.longValue());
       }
     } else {
       // writes packed ints
@@ -143,7 +145,8 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
                                                       PackedInts.bitsRequired(delta), 
                                                       PackedInts.DEFAULT);
       for (Number n : values) {
-        writer.add(n.longValue() - minValue);
+        long v = n == null ? 0 : n.longValue();
+        writer.add(v - minValue);
       }
       writer.finish();
     }
@@ -156,6 +159,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     int minLength = Integer.MAX_VALUE;
     int maxLength = Integer.MIN_VALUE;
     for (BytesRef b : values) {
+      if (b == null) {
+        b = new BytesRef(); // 4.0 doesnt distinguish
+      }
       minLength = Math.min(minLength, b.length);
       maxLength = Math.max(maxLength, b.length);
       if (uniqueValues != null) {
@@ -243,7 +249,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     
     output.writeInt(length);
     for (BytesRef v : values) {
-      output.writeBytes(v.bytes, v.offset, v.length);
+      if (v != null) {
+        output.writeBytes(v.bytes, v.offset, v.length);
+      }
     }
   }
   
@@ -264,7 +272,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     final long startPos = data.getFilePointer();
     
     for (BytesRef v : values) {
-      data.writeBytes(v.bytes, v.offset, v.length);
+      if (v != null) {
+        data.writeBytes(v.bytes, v.offset, v.length);
+      }
     }
     
     /* addresses */
@@ -279,7 +289,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     long currentPosition = 0;
     for (BytesRef v : values) {
       w.add(currentPosition);
-      currentPosition += v.length;
+      if (v != null) {
+        currentPosition += v.length;
+      }
     }
     // write sentinel
     assert currentPosition == maxAddress;
@@ -301,7 +313,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     // deduplicate
     TreeSet<BytesRef> dictionary = new TreeSet<BytesRef>();
     for (BytesRef v : values) {
-      dictionary.add(BytesRef.deepCopyOf(v));
+      dictionary.add(v == null ? new BytesRef() : BytesRef.deepCopyOf(v));
     }
     
     /* values */
@@ -318,6 +330,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     final PackedInts.Writer w = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
 
     for (BytesRef v : values) {
+      if (v == null) {
+        v = new BytesRef();
+      }
       int ord = dictionary.headSet(v).size();
       w.add(ord);
     }
@@ -338,7 +353,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     // deduplicate
     TreeSet<BytesRef> dictionary = new TreeSet<BytesRef>();
     for (BytesRef v : values) {
-      dictionary.add(BytesRef.deepCopyOf(v));
+      dictionary.add(v == null ? new BytesRef() : BytesRef.deepCopyOf(v));
     }
     
     /* values */
@@ -359,7 +374,7 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     final PackedInts.Writer w = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(currentAddress), PackedInts.DEFAULT);
 
     for (BytesRef v : values) {
-      w.add(valueToAddress.get(v));
+      w.add(valueToAddress.get(v == null ? new BytesRef() : v));
     }
     w.finish();
   }
@@ -385,6 +400,15 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
       maxLength = Math.max(maxLength, b.length);
     }
     
+    // but dont use fixed if there are missing values (we are simulating how lucene40 wrote dv...)
+    boolean anyMissing = false;
+    for (Number n : docToOrd) {
+      if (n.longValue() == -1) {
+        anyMissing = true;
+        break;
+      }
+    }
+    
     boolean success = false;
     IndexOutput data = null;
     IndexOutput index = null;
@@ -394,12 +418,22 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
     try {
       data = dir.createOutput(dataName, state.context);
       index = dir.createOutput(indexName, state.context);
-      if (minLength == maxLength) {
+      if (minLength == maxLength && !anyMissing) {
         // fixed byte[]
         addFixedSortedBytesField(field, data, index, values, docToOrd, minLength);
       } else {
         // var byte[]
-        addVarSortedBytesField(field, data, index, values, docToOrd);
+        // three cases for simulating the old writer:
+        // 1. no missing
+        // 2. missing (and empty string in use): remap ord=-1 -> ord=0
+        // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values
+        if (!anyMissing) {
+          addVarSortedBytesField(field, data, index, values, docToOrd);
+        } else if (minLength == 0) {
+          addVarSortedBytesField(field, data, index, values, MissingOrdRemapper.mapMissingToOrd0(docToOrd));
+        } else {
+          addVarSortedBytesField(field, data, index, MissingOrdRemapper.insertEmptyValue(values), MissingOrdRemapper.mapAllOrds(docToOrd));
+        }
       }
       success = true;
     } finally {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index 81c5cbef204..7dd38ae1c69 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -52,6 +52,7 @@ import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.LuceneTestCase;
@@ -1073,8 +1074,10 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
       doc.add(newTextField("id", "noValue", Field.Store.YES));
       w.addDocument(doc);
     }
-    BytesRef bytesRef = new BytesRef();
-    hash.add(bytesRef); // add empty value for the gaps
+    if (!codecSupportsDocsWithField("field")) {
+      BytesRef bytesRef = new BytesRef();
+      hash.add(bytesRef); // add empty value for the gaps
+    }
     if (rarely()) {
       w.commit();
     }
@@ -2197,5 +2200,205 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
       doTestNumericsVsStoredFields(longs);
     }
   }
+  
+  public void testTwoNumbersOneMissing() throws IOException {
+    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    Directory directory = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.YES));
+    doc.add(new NumericDocValuesField("dv1", 0));
+    iw.addDocument(doc);
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.YES));
+    iw.addDocument(doc);
+    iw.forceMerge(1);
+    iw.close();
+    
+    IndexReader ir = DirectoryReader.open(directory);
+    assertEquals(1, ir.leaves().size());
+    AtomicReader ar = ir.leaves().get(0).reader();
+    NumericDocValues dv = ar.getNumericDocValues("dv1");
+    assertEquals(0, dv.get(0));
+    assertEquals(0, dv.get(1));
+    Bits docsWithField = ar.getDocsWithField("dv1");
+    assertTrue(docsWithField.get(0));
+    assertFalse(docsWithField.get(1));
+    ir.close();
+    directory.close();
+  }
+  
+  public void testTwoNumbersOneMissingWithMerging() throws IOException {
+    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    Directory directory = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.YES));
+    doc.add(new NumericDocValuesField("dv1", 0));
+    iw.addDocument(doc);
+    iw.commit();
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.YES));
+    iw.addDocument(doc);
+    iw.forceMerge(1);
+    iw.close();
+    
+    IndexReader ir = DirectoryReader.open(directory);
+    assertEquals(1, ir.leaves().size());
+    AtomicReader ar = ir.leaves().get(0).reader();
+    NumericDocValues dv = ar.getNumericDocValues("dv1");
+    assertEquals(0, dv.get(0));
+    assertEquals(0, dv.get(1));
+    Bits docsWithField = ar.getDocsWithField("dv1");
+    assertTrue(docsWithField.get(0));
+    assertFalse(docsWithField.get(1));
+    ir.close();
+    directory.close();
+  }
+  
+  public void testThreeNumbersOneMissingWithMerging() throws IOException {
+    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    Directory directory = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.YES));
+    doc.add(new NumericDocValuesField("dv1", 0));
+    iw.addDocument(doc);
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.YES));
+    iw.addDocument(doc);
+    iw.commit();
+    doc = new Document();
+    doc.add(new StringField("id", "2", Field.Store.YES));
+    doc.add(new NumericDocValuesField("dv1", 5));
+    iw.addDocument(doc);
+    iw.forceMerge(1);
+    iw.close();
+    
+    IndexReader ir = DirectoryReader.open(directory);
+    assertEquals(1, ir.leaves().size());
+    AtomicReader ar = ir.leaves().get(0).reader();
+    NumericDocValues dv = ar.getNumericDocValues("dv1");
+    assertEquals(0, dv.get(0));
+    assertEquals(0, dv.get(1));
+    assertEquals(5, dv.get(2));
+    Bits docsWithField = ar.getDocsWithField("dv1");
+    assertTrue(docsWithField.get(0));
+    assertFalse(docsWithField.get(1));
+    assertTrue(docsWithField.get(2));
+    ir.close();
+    directory.close();
+  }
+  
+  public void testTwoBytesOneMissing() throws IOException {
+    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    Directory directory = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.YES));
+    doc.add(new BinaryDocValuesField("dv1", new BytesRef()));
+    iw.addDocument(doc);
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.YES));
+    iw.addDocument(doc);
+    iw.forceMerge(1);
+    iw.close();
+    
+    IndexReader ir = DirectoryReader.open(directory);
+    assertEquals(1, ir.leaves().size());
+    AtomicReader ar = ir.leaves().get(0).reader();
+    BinaryDocValues dv = ar.getBinaryDocValues("dv1");
+    BytesRef ref = new BytesRef();
+    dv.get(0, ref);
+    assertEquals(new BytesRef(), ref);
+    dv.get(1, ref);
+    assertEquals(new BytesRef(), ref);
+    Bits docsWithField = ar.getDocsWithField("dv1");
+    assertTrue(docsWithField.get(0));
+    assertFalse(docsWithField.get(1));
+    ir.close();
+    directory.close();
+  }
+  
+  public void testTwoBytesOneMissingWithMerging() throws IOException {
+    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    Directory directory = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.YES));
+    doc.add(new BinaryDocValuesField("dv1", new BytesRef()));
+    iw.addDocument(doc);
+    iw.commit();
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.YES));
+    iw.addDocument(doc);
+    iw.forceMerge(1);
+    iw.close();
+    
+    IndexReader ir = DirectoryReader.open(directory);
+    assertEquals(1, ir.leaves().size());
+    AtomicReader ar = ir.leaves().get(0).reader();
+    BinaryDocValues dv = ar.getBinaryDocValues("dv1");
+    BytesRef ref = new BytesRef();
+    dv.get(0, ref);
+    assertEquals(new BytesRef(), ref);
+    dv.get(1, ref);
+    assertEquals(new BytesRef(), ref);
+    Bits docsWithField = ar.getDocsWithField("dv1");
+    assertTrue(docsWithField.get(0));
+    assertFalse(docsWithField.get(1));
+    ir.close();
+    directory.close();
+  }
+  
+  public void testThreeBytesOneMissingWithMerging() throws IOException {
+    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    Directory directory = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.YES));
+    doc.add(new BinaryDocValuesField("dv1", new BytesRef()));
+    iw.addDocument(doc);
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.YES));
+    iw.addDocument(doc);
+    iw.commit();
+    doc = new Document();
+    doc.add(new StringField("id", "2", Field.Store.YES));
+    doc.add(new BinaryDocValuesField("dv1", new BytesRef("boo")));
+    iw.addDocument(doc);
+    iw.forceMerge(1);
+    iw.close();
+    
+    IndexReader ir = DirectoryReader.open(directory);
+    assertEquals(1, ir.leaves().size());
+    AtomicReader ar = ir.leaves().get(0).reader();
+    BinaryDocValues dv = ar.getBinaryDocValues("dv1");
+    BytesRef ref = new BytesRef();
+    dv.get(0, ref);
+    assertEquals(new BytesRef(), ref);
+    dv.get(1, ref);
+    assertEquals(new BytesRef(), ref);
+    dv.get(2, ref);
+    assertEquals(new BytesRef("boo"), ref);
+    Bits docsWithField = ar.getDocsWithField("dv1");
+    assertTrue(docsWithField.get(0));
+    assertFalse(docsWithField.get(1));
+    assertTrue(docsWithField.get(2));
+    ir.close();
+    directory.close();
+  }
 
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
index a0a2521ef7d..66c24874b55 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Set;
 
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FilterIterator;
 
 /**
@@ -136,6 +137,11 @@ public final class FieldFilterAtomicReader extends FilterAtomicReader {
     return hasField(field) ? super.getNormValues(field) : null;
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    return hasField(field) ? super.getDocsWithField(field) : null;
+  }
+
   @Override
   public String toString() {
     final StringBuilder sb = new StringBuilder("FieldFilterAtomicReader(reader=");
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index 3060d683c26..f14f772e0df 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -1368,6 +1368,13 @@ public abstract class LuceneTestCase extends Assert {
     }
     return true;
   }
+  
+  /** Returns true if the codec for the field "supports" docsWithField 
+   * (other codecs return MatchAllBits, because you couldnt write missing values before) */
+  public static boolean codecSupportsDocsWithField(String field) {
+    // currently only one codec!
+    return _TestUtil.getDocValuesFormat(Codec.getDefault(), field).equals("SimpleText");
+  }
 
   public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
     assertReaderStatisticsEquals(info, leftReader, rightReader);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
index e68d42c2bf0..38b2592fdeb 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@@ -45,6 +45,7 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
@@ -742,6 +743,15 @@ public class _TestUtil {
       return p.getName();
     }
   }
+  
+  public static String getDocValuesFormat(Codec codec, String field) {
+    DocValuesFormat d = codec.docValuesFormat();
+    if (d instanceof PerFieldDocValuesFormat) {
+      return ((PerFieldDocValuesFormat)d).getDocValuesFormatForField(field).getName();
+    } else {
+      return d.getName();
+    }
+  }
 
   public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
     String[] files = dir.listAll();
diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
index 4dfc114a189..bca44a19d76 100644
--- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
+++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
@@ -221,6 +221,7 @@ public class DocValuesFacets {
         if (schemaField.multiValued()) {
           missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
         } else {
+          // nocommit: support missing count (ord = -1) for single-valued here.
           missingCount = 0; // single-valued dv is implicitly 0
         }
       }
diff --git a/solr/core/src/test/org/apache/solr/search/TestDocSet.java b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
index faebe6c261b..c2a282066e5 100644
--- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java
+++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
@@ -403,6 +403,11 @@ public class TestDocSet extends LuceneTestCase {
         return null;
       }
 
+      @Override
+      public Bits getDocsWithField(String field) throws IOException {
+        return null;
+      }
+
       @Override
       public NumericDocValues getNormValues(String field) {
         return null;

From 2c6bf041900192ff7ec58b0d2bc0c8dcfae85b5c Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Fri, 16 Aug 2013 21:19:19 +0000
Subject: [PATCH 03/16] bump 4.5 codec

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1514897 13f79535-47bb-0310-9956-ffa450edef68
---
 .../codecs/diskdv/DiskDocValuesFormat.java    |  17 +-
 .../codecs/diskdv/DiskDocValuesProducer.java  | 433 ++--------
 .../codecs/diskdv/DiskNormsFormat.java}       |  18 +-
 .../java/org/apache/lucene/codecs/Codec.java  |   2 +-
 .../org/apache/lucene/codecs/FilterCodec.java |   4 +-
 .../lucene/codecs/lucene40/Lucene40Codec.java |   1 -
 .../lucene/codecs/lucene42/package.html       |   8 +-
 .../lucene/codecs/lucene45/Lucene45Codec.java | 141 ++++
 .../lucene45/Lucene45DocValuesConsumer.java}  |  24 +-
 .../lucene45/Lucene45DocValuesFormat.java     | 167 ++++
 .../lucene45/Lucene45DocValuesProducer.java   | 755 ++++++++++++++++++
 .../lucene/codecs/lucene45/package.html       | 396 +++++++++
 .../org/apache/lucene/codecs/package.html     |   8 +-
 .../org/apache/lucene/index/CheckIndex.java   |   2 +-
 .../services/org.apache.lucene.codecs.Codec   |   1 +
 .../org.apache.lucene.codecs.DocValuesFormat  |   1 +
 .../org/apache/lucene/TestExternalCodecs.java |  24 +-
 .../TestLucene45DocValuesFormat.java}         |   9 +-
 .../perfield/TestPerFieldDocValuesFormat.java |   6 +-
 .../perfield/TestPerFieldPostingsFormat2.java |  10 +-
 .../apache/lucene/index/TestAddIndexes.java   |   6 +-
 .../index/TestAllFilesHaveCodecHeader.java    |   4 +-
 .../lucene/index/TestDuelingCodecs.java       |   2 +-
 .../lucene/util/TestNamedSPILoader.java       |   6 +-
 .../facet/codecs/facet42/Facet42Codec.java    |   1 +
 .../facet42/Facet42DocValuesConsumer.java     |  12 +-
 .../valuesource/DoubleFieldSource.java        |   4 +-
 .../valuesource/FloatFieldSource.java         |   4 +-
 .../function/valuesource/IntFieldSource.java  |   4 +-
 .../function/valuesource/LongFieldSource.java |   4 +-
 lucene/site/xsl/index.xsl                     |   2 +-
 .../bbox/BBoxSimilarityValueSource.java       |   6 +-
 .../analyzing/AnalyzingInfixSuggester.java    |   4 +-
 .../codecs/asserting/AssertingCodec.java      |   6 +-
 .../asserting/AssertingDocValuesFormat.java   |   6 +-
 .../asserting/AssertingNormsFormat.java       |   1 +
 .../cheapbastard/CheapBastardCodec.java       |  10 +-
 .../CheapBastardDocValuesFormat.java          |  74 --
 .../CheapBastardDocValuesProducer.java        | 444 ----------
 .../codecs/compressing/CompressingCodec.java  |   6 +-
 .../compressing/FastCompressingCodec.java     |   4 +-
 .../FastDecompressionCompressingCodec.java    |   1 +
 .../HighCompressionCompressingCodec.java      |   1 +
 .../index/BaseStoredFieldsFormatTestCase.java |   4 +-
 .../org/apache/lucene/index/RandomCodec.java  |  12 +-
 .../util/TestRuleSetupAndRestoreClassEnv.java |   4 +-
 .../org/apache/lucene/util/_TestUtil.java     |  11 +-
 .../org.apache.lucene.codecs.DocValuesFormat  |   1 -
 .../apache/solr/core/SchemaCodecFactory.java  |   4 +-
 .../apache/solr/request/NumericFacets.java    |   5 +-
 .../solr/collection1/conf/schema_codec.xml    |   2 +-
 .../apache/solr/core/TestCodecSupport.java    |   8 +-
 52 files changed, 1673 insertions(+), 1017 deletions(-)
 rename lucene/{test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardNormsFormat.java => codecs/src/java/org/apache/lucene/codecs/diskdv/DiskNormsFormat.java} (70%)
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java
 rename lucene/{codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java => core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java} (93%)
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene45/package.html
 rename lucene/{codecs/src/test/org/apache/lucene/codecs/diskdv/TestCheapBastardDocValuesFormat.java => core/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java} (79%)
 delete mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesFormat.java
 delete mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesFormat.java
index 43a7d57eecf..f3fd35e6554 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesFormat.java
@@ -22,8 +22,11 @@ import java.io.IOException;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * DocValues format that keeps most things on disk.
@@ -40,7 +43,12 @@ public final class DiskDocValuesFormat extends DocValuesFormat {
 
   @Override
   public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    return new DiskDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+    return new Lucene45DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION) {
+      @Override
+      protected void addTermsDict(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+        addBinaryField(field, values);
+      }
+    };
   }
 
   @Override
@@ -52,11 +60,4 @@ public final class DiskDocValuesFormat extends DocValuesFormat {
   public static final String DATA_EXTENSION = "dvdd";
   public static final String META_CODEC = "DiskDocValuesMetadata";
   public static final String META_EXTENSION = "dvdm";
-  public static final int VERSION_START = 0;
-  public static final int VERSION_COMPRESSED_TERMS = 1;
-  public static final int VERSION_CURRENT = VERSION_COMPRESSED_TERMS;
-  public static final byte NUMERIC = 0;
-  public static final byte BINARY = 1;
-  public static final byte SORTED = 2;
-  public static final byte SORTED_SET = 3;
 }
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
index 11a60fdaf6c..41d2e87b9fe 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
@@ -17,35 +17,26 @@ package org.apache.lucene.codecs.diskdv;
  * limitations under the License.
  */
 
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.DELTA_COMPRESSED;
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.GCD_COMPRESSED;
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.TABLE_COMPRESSED;
-
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.BINARY_FIXED_UNCOMPRESSED;
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.BINARY_PREFIX_COMPRESSED;
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.TABLE_COMPRESSED;
 
 import java.io.IOException;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.DocValuesProducer.SortedSetDocsWithField;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -56,32 +47,28 @@ import org.apache.lucene.util.packed.PackedInts;
 
 class DiskDocValuesProducer extends DocValuesProducer {
   private final Map<Integer,NumericEntry> numerics;
-  private final Map<Integer,BinaryEntry> binaries;
   private final Map<Integer,NumericEntry> ords;
   private final Map<Integer,NumericEntry> ordIndexes;
+  private final Map<Integer,BinaryEntry> binaries;
   private final IndexInput data;
   private final int maxDoc;
-
-  // memory-resident structures
-  private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
-  private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
   
   DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+    this.maxDoc = state.segmentInfo.getDocCount();
     // read in the entries from the metadata file.
     IndexInput in = state.directory.openInput(metaName, state.context);
-    this.maxDoc = state.segmentInfo.getDocCount();
     boolean success = false;
     final int version;
     try {
       version = CodecUtil.checkHeader(in, metaCodec, 
-                                      DiskDocValuesFormat.VERSION_CURRENT,
-                                      DiskDocValuesFormat.VERSION_CURRENT);
+                                      Lucene45DocValuesFormat.VERSION_CURRENT,
+                                      Lucene45DocValuesFormat.VERSION_CURRENT);
       numerics = new HashMap<Integer,NumericEntry>();
       ords = new HashMap<Integer,NumericEntry>();
       ordIndexes = new HashMap<Integer,NumericEntry>();
       binaries = new HashMap<Integer,BinaryEntry>();
-      readFields(in, state.fieldInfos);
+      readFields(in);
 
       success = true;
     } finally {
@@ -97,10 +84,10 @@ class DiskDocValuesProducer extends DocValuesProducer {
       String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
       data = state.directory.openInput(dataName, state.context);
       final int version2 = CodecUtil.checkHeader(data, dataCodec, 
-                                                 DiskDocValuesFormat.VERSION_CURRENT,
-                                                 DiskDocValuesFormat.VERSION_CURRENT);
+                                                 Lucene45DocValuesFormat.VERSION_CURRENT,
+                                                 Lucene45DocValuesFormat.VERSION_CURRENT);
       if (version != version2) {
-        throw new CorruptIndexException("Format versions mismatch");
+        throw new CorruptIndexException("Versions mismatch");
       }
 
       success = true;
@@ -109,61 +96,62 @@ class DiskDocValuesProducer extends DocValuesProducer {
         IOUtils.closeWhileHandlingException(this.data);
       }
     }
+
   }
   
-  private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
+  private void readFields(IndexInput meta) throws IOException {
     int fieldNumber = meta.readVInt();
     while (fieldNumber != -1) {
       byte type = meta.readByte();
-      if (type == DiskDocValuesFormat.NUMERIC) {
+      if (type == Lucene45DocValuesFormat.NUMERIC) {
         numerics.put(fieldNumber, readNumericEntry(meta));
-      } else if (type == DiskDocValuesFormat.BINARY) {
+      } else if (type == Lucene45DocValuesFormat.BINARY) {
         BinaryEntry b = readBinaryEntry(meta);
         binaries.put(fieldNumber, b);
-      } else if (type == DiskDocValuesFormat.SORTED) {
+      } else if (type == Lucene45DocValuesFormat.SORTED) {
         // sorted = binary + numeric
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
         }
-        if (meta.readByte() != DiskDocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        if (meta.readByte() != Lucene45DocValuesFormat.BINARY) {
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
         }
         BinaryEntry b = readBinaryEntry(meta);
         binaries.put(fieldNumber, b);
         
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
         }
-        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
         }
         NumericEntry n = readNumericEntry(meta);
         ords.put(fieldNumber, n);
-      } else if (type == DiskDocValuesFormat.SORTED_SET) {
+      } else if (type == Lucene45DocValuesFormat.SORTED_SET) {
         // sortedset = binary + numeric + ordIndex
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
         }
-        if (meta.readByte() != DiskDocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        if (meta.readByte() != Lucene45DocValuesFormat.BINARY) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
         }
         BinaryEntry b = readBinaryEntry(meta);
         binaries.put(fieldNumber, b);
         
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
         }
-        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
         }
         NumericEntry n1 = readNumericEntry(meta);
         ords.put(fieldNumber, n1);
         
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
         }
-        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
         }
         NumericEntry n2 = readNumericEntry(meta);
         ordIndexes.put(fieldNumber, n2);
@@ -209,27 +197,18 @@ class DiskDocValuesProducer extends DocValuesProducer {
   
   static BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
     BinaryEntry entry = new BinaryEntry();
-    entry.format = meta.readVInt();
+    int format = meta.readVInt();
+    if (format != Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED && format != Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED) {
+      throw new CorruptIndexException("Unexpected format for binary entry: " + format + ", input=" + meta);
+    }
     entry.minLength = meta.readVInt();
     entry.maxLength = meta.readVInt();
     entry.count = meta.readVLong();
     entry.offset = meta.readLong();
-    switch(entry.format) {
-      case BINARY_FIXED_UNCOMPRESSED:
-        break;
-      case BINARY_PREFIX_COMPRESSED:
-        entry.addressInterval = meta.readVInt();
-        entry.addressesOffset = meta.readLong();
-        entry.packedIntsVersion = meta.readVInt();
-        entry.blockSize = meta.readVInt();
-        break;
-      case BINARY_VARIABLE_UNCOMPRESSED:
-        entry.addressesOffset = meta.readLong();
-        entry.packedIntsVersion = meta.readVInt();
-        entry.blockSize = meta.readVInt();
-        break;
-      default:
-        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+    if (entry.minLength != entry.maxLength) {
+      entry.addressesOffset = meta.readLong();
+      entry.packedIntsVersion = meta.readVInt();
+      entry.blockSize = meta.readVInt();
     }
     return entry;
   }
@@ -237,10 +216,10 @@ class DiskDocValuesProducer extends DocValuesProducer {
   @Override
   public NumericDocValues getNumeric(FieldInfo field) throws IOException {
     NumericEntry entry = numerics.get(field.number);
-    return getNumeric(entry);
+    return getNumeric(field, entry);
   }
   
-  LongNumericDocValues getNumeric(NumericEntry entry) throws IOException {
+  private LongNumericDocValues getNumeric(FieldInfo field, final NumericEntry entry) throws IOException {
     final IndexInput data = this.data.clone();
     data.seek(entry.offset);
 
@@ -264,12 +243,12 @@ class DiskDocValuesProducer extends DocValuesProducer {
           }
         };
       case TABLE_COMPRESSED:
-        final long table[] = entry.table;
+        final long[] table = entry.table;
         final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
         final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
         return new LongNumericDocValues() {
           @Override
-          public long get(long id) {
+          long get(long id) {
             return table[(int) ords.get((int) id)];
           }
         };
@@ -281,15 +260,10 @@ class DiskDocValuesProducer extends DocValuesProducer {
   @Override
   public BinaryDocValues getBinary(FieldInfo field) throws IOException {
     BinaryEntry bytes = binaries.get(field.number);
-    switch(bytes.format) {
-      case BINARY_FIXED_UNCOMPRESSED:
-        return getFixedBinary(field, bytes);
-      case BINARY_VARIABLE_UNCOMPRESSED:
-        return getVariableBinary(field, bytes);
-      case BINARY_PREFIX_COMPRESSED:
-        return getCompressedBinary(field, bytes);
-      default:
-        throw new AssertionError();
+    if (bytes.minLength == bytes.maxLength) {
+      return getFixedBinary(field, bytes);
+    } else {
+      return getVariableBinary(field, bytes);
     }
   }
   
@@ -318,22 +292,13 @@ class DiskDocValuesProducer extends DocValuesProducer {
   
   private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
     final IndexInput data = this.data.clone();
-    
-    final MonotonicBlockPackedReader addresses;
-    synchronized (addressInstances) {
-      MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
-      if (addrInstance == null) {
-        data.seek(bytes.addressesOffset);
-        addrInstance = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, false);
-        addressInstances.put(field.number, addrInstance);
-      }
-      addresses = addrInstance;
-    }
+    data.seek(bytes.addressesOffset);
 
+    final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, true);
     return new LongBinaryDocValues() {
       @Override
       public void get(long id, BytesRef result) {
-        long startAddress = bytes.offset + (id == 0 ? 0 : addresses.get(id-1));
+        long startAddress = bytes.offset + (id == 0 ? 0 : + addresses.get(id-1));
         long endAddress = bytes.offset + addresses.get(id);
         int length = (int) (endAddress - startAddress);
         try {
@@ -352,39 +317,11 @@ class DiskDocValuesProducer extends DocValuesProducer {
     };
   }
 
-  private BinaryDocValues getCompressedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
-    final IndexInput data = this.data.clone();
-    final long interval = bytes.addressInterval;
-
-    final MonotonicBlockPackedReader addresses;
-    synchronized (addressInstances) {
-      MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
-      if (addrInstance == null) {
-        data.seek(bytes.addressesOffset);
-        final long size;
-        if (bytes.count % interval == 0) {
-          size = bytes.count / interval;
-        } else {
-          size = 1L + bytes.count / interval;
-        }
-        addrInstance = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, size, false);
-        addressInstances.put(field.number, addrInstance);
-      }
-      addresses = addrInstance;
-    }
-    
-    return new CompressedBinaryDocValues(bytes, addresses, data);
-  }
-
   @Override
   public SortedDocValues getSorted(FieldInfo field) throws IOException {
     final int valueCount = (int) binaries.get(field.number).count;
     final BinaryDocValues binary = getBinary(field);
-    NumericEntry entry = ords.get(field.number);
-    IndexInput data = this.data.clone();
-    data.seek(entry.offset);
-    final BlockPackedReader ordinals = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-    
+    final NumericDocValues ordinals = getNumeric(field, ords.get(field.number));
     return new SortedDocValues() {
 
       @Override
@@ -401,46 +338,18 @@ class DiskDocValuesProducer extends DocValuesProducer {
       public int getValueCount() {
         return valueCount;
       }
-
-      @Override
-      public int lookupTerm(BytesRef key) {
-        if (binary instanceof CompressedBinaryDocValues) {
-          return (int) ((CompressedBinaryDocValues)binary).lookupTerm(key);
-        } else {
-        return super.lookupTerm(key);
-        }
-      }
-
-      @Override
-      public TermsEnum termsEnum() {
-        if (binary instanceof CompressedBinaryDocValues) {
-          return ((CompressedBinaryDocValues)binary).getTermsEnum();
-        } else {
-          return super.termsEnum();
-        }
-      }
     };
   }
 
   @Override
   public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
     final long valueCount = binaries.get(field.number).count;
-    // we keep the byte[]s and list of ords on disk, these could be large
     final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
-    final LongNumericDocValues ordinals = getNumeric(ords.get(field.number));
-    // but the addresses to the ord stream are in RAM
-    final MonotonicBlockPackedReader ordIndex;
-    synchronized (ordIndexInstances) {
-      MonotonicBlockPackedReader ordIndexInstance = ordIndexInstances.get(field.number);
-      if (ordIndexInstance == null) {
-        NumericEntry entry = ordIndexes.get(field.number);
-        IndexInput data = this.data.clone();
-        data.seek(entry.offset);
-        ordIndexInstance = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
-        ordIndexInstances.put(field.number, ordIndexInstance);
-      }
-      ordIndex = ordIndexInstance;
-    }
+    final LongNumericDocValues ordinals = getNumeric(field, ords.get(field.number));
+    NumericEntry entry = ordIndexes.get(field.number);
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset);
+    final MonotonicBlockPackedReader ordIndex = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
     
     return new SortedSetDocValues() {
       long offset;
@@ -472,31 +381,11 @@ class DiskDocValuesProducer extends DocValuesProducer {
       public long getValueCount() {
         return valueCount;
       }
-      
-      @Override
-      public long lookupTerm(BytesRef key) {
-        if (binary instanceof CompressedBinaryDocValues) {
-          return ((CompressedBinaryDocValues)binary).lookupTerm(key);
-        } else {
-          return super.lookupTerm(key);
-        }
-      }
-
-      @Override
-      public TermsEnum termsEnum() {
-        if (binary instanceof CompressedBinaryDocValues) {
-          return ((CompressedBinaryDocValues)binary).getTermsEnum();
-        } else {
-          return super.termsEnum();
-        }
-      }
     };
   }
-
+  
   @Override
   public Bits getDocsWithField(FieldInfo field) throws IOException {
-    // nocommit: only use this if the field's entry has missing values (write that),
-    // otherwise return MatchAllBits
     if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
       return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
     } else {
@@ -525,12 +414,10 @@ class DiskDocValuesProducer extends DocValuesProducer {
   static class BinaryEntry {
     long offset;
 
-    int format;
     long count;
     int minLength;
     int maxLength;
     long addressesOffset;
-    long addressInterval;
     int packedIntsVersion;
     int blockSize;
   }
@@ -553,204 +440,4 @@ class DiskDocValuesProducer extends DocValuesProducer {
     
     abstract void get(long id, BytesRef Result);
   }
-  
-  // in the compressed case, we add a few additional operations for
-  // more efficient reverse lookup and enumeration
-  static class CompressedBinaryDocValues extends LongBinaryDocValues {
-    final BinaryEntry bytes;
-    final long interval;
-    final long numValues;
-    final long numIndexValues;
-    final MonotonicBlockPackedReader addresses;
-    final IndexInput data;
-    final TermsEnum termsEnum;
-    
-    public CompressedBinaryDocValues(BinaryEntry bytes, MonotonicBlockPackedReader addresses, IndexInput data) throws IOException {
-      this.bytes = bytes;
-      this.interval = bytes.addressInterval;
-      this.addresses = addresses;
-      this.data = data;
-      this.numValues = bytes.count;
-      this.numIndexValues = addresses.size();
-      this.termsEnum = getTermsEnum(data);
-    }
-    
-    @Override
-    public void get(long id, BytesRef result) {
-      try {
-        termsEnum.seekExact(id);
-        BytesRef term = termsEnum.term();
-        result.bytes = term.bytes;
-        result.offset = term.offset;
-        result.length = term.length;
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-    
-    long lookupTerm(BytesRef key) {
-      try {
-        SeekStatus status = termsEnum.seekCeil(key);
-        if (status == SeekStatus.END) {
-          return -numValues-1;
-        } else if (status == SeekStatus.FOUND) {
-          return termsEnum.ord();
-        } else {
-          return -termsEnum.ord()-1;
-        }
-      } catch (IOException bogus) {
-        throw new RuntimeException(bogus);
-      }
-    }
-    
-    TermsEnum getTermsEnum() {
-      try {
-        return getTermsEnum(data.clone());
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-    
-    private TermsEnum getTermsEnum(final IndexInput input) throws IOException {
-      input.seek(bytes.offset);
-      
-      return new TermsEnum() {
-        private long currentOrd = -1;
-        // TODO: maxLength is negative when all terms are merged away...
-        private final BytesRef termBuffer = new BytesRef(bytes.maxLength < 0 ? 0 : bytes.maxLength);
-        private final BytesRef term = new BytesRef(); // TODO: paranoia?
-
-        @Override
-        public BytesRef next() throws IOException {
-          if (doNext() == null) {
-            return null;
-          } else {
-            setTerm();
-            return term;
-          }
-        }
-        
-        private BytesRef doNext() throws IOException {
-          if (++currentOrd >= numValues) {
-            return null;
-          } else {
-            int start = input.readVInt();
-            int suffix = input.readVInt();
-            input.readBytes(termBuffer.bytes, start, suffix);
-            termBuffer.length = start + suffix;
-            return termBuffer;
-          }
-        }
-
-        @Override
-        public SeekStatus seekCeil(BytesRef text) throws IOException {
-          // binary-search just the index values to find the block,
-          // then scan within the block
-          long low = 0;
-          long high = numIndexValues-1;
-
-          while (low <= high) {
-            long mid = (low + high) >>> 1;
-            doSeek(mid * interval);
-            int cmp = termBuffer.compareTo(text);
-
-            if (cmp < 0) {
-              low = mid + 1;
-            } else if (cmp > 0) {
-              high = mid - 1;
-            } else {
-              // we got lucky, found an indexed term
-              setTerm();
-              return SeekStatus.FOUND;
-            }
-          }
-          
-          if (numIndexValues == 0) {
-            return SeekStatus.END;
-          }
-          
-          // block before insertion point
-          long block = low-1;
-          doSeek(block < 0 ? -1 : block * interval);
-          
-          while (doNext() != null) {
-            int cmp = termBuffer.compareTo(text);
-            if (cmp == 0) {
-              setTerm();
-              return SeekStatus.FOUND;
-            } else if (cmp > 0) {
-              setTerm();
-              return SeekStatus.NOT_FOUND;
-            }
-          }
-          
-          return SeekStatus.END;
-        }
-
-        @Override
-        public void seekExact(long ord) throws IOException {
-          doSeek(ord);
-          setTerm();
-        }
-        
-        private void doSeek(long ord) throws IOException {
-          long block = ord / interval;
-
-          if (ord >= currentOrd && block == currentOrd / interval) {
-            // seek within current block
-          } else {
-            // position before start of block
-            currentOrd = ord - ord % interval - 1;
-            input.seek(bytes.offset + addresses.get(block));
-          }
-          
-          while (currentOrd < ord) {
-            doNext();
-          }
-        }
-        
-        private void setTerm() {
-          // TODO: is there a cleaner way
-          term.bytes = new byte[termBuffer.length];
-          term.offset = 0;
-          term.copyBytes(termBuffer);
-        }
-
-        @Override
-        public BytesRef term() throws IOException {
-          return term;
-        }
-
-        @Override
-        public long ord() throws IOException {
-          return currentOrd;
-        }
-        
-        @Override
-        public Comparator<BytesRef> getComparator() {
-          return BytesRef.getUTF8SortedAsUnicodeComparator();
-        }
-
-        @Override
-        public int docFreq() throws IOException {
-          throw new UnsupportedOperationException();
-        }
-
-        @Override
-        public long totalTermFreq() throws IOException {
-          return -1;
-        }
-
-        @Override
-        public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
-          throw new UnsupportedOperationException();
-        }
-
-        @Override
-        public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
-          throw new UnsupportedOperationException();
-        }
-      };
-    }
-  }
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardNormsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskNormsFormat.java
similarity index 70%
rename from lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardNormsFormat.java
rename to lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskNormsFormat.java
index 5834f9c1f71..63a8ab9c3db 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardNormsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskNormsFormat.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.cheapbastard;
+package org.apache.lucene.codecs.diskdv;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -22,25 +22,25 @@ import java.io.IOException;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 
 /** Norms format that keeps all norms on disk */
-public final class CheapBastardNormsFormat extends NormsFormat {
+public final class DiskNormsFormat extends NormsFormat {
 
   @Override
   public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
-    return new DiskDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+    return new Lucene45DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
   }
 
   @Override
   public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
-    return new CheapBastardDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+    return new DiskDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
   }
   
-  static final String DATA_CODEC = "CheapBastardNormsData";
-  static final String DATA_EXTENSION = "cbnd";
-  static final String META_CODEC = "CheapBastardNormsMetadata";
-  static final String META_EXTENSION = "cbnm";
+  static final String DATA_CODEC = "DiskNormsData";
+  static final String DATA_EXTENSION = "dnvd";
+  static final String META_CODEC = "DiskNormsMetadata";
+  static final String META_EXTENSION = "dnvm";
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index 1b2726f06fd..3b98449eb5c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -119,7 +119,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
     loader.reload(classloader);
   }
   
-  private static Codec defaultCodec = Codec.forName("Lucene42");
+  private static Codec defaultCodec = Codec.forName("Lucene45");
   
   /** expert: returns the default codec used for newly created
    *  {@link IndexWriterConfig}s.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
index fc4728ae846..d97d577de22 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
@@ -21,13 +21,13 @@ package org.apache.lucene.codecs;
  * A codec that forwards all its method calls to another codec.
  * <p>
  * Extend this class when you need to reuse the functionality of an existing
- * codec. For example, if you want to build a codec that redefines Lucene42's
+ * codec. For example, if you want to build a codec that redefines Lucene45's
  * {@link LiveDocsFormat}:
  * <pre class="prettyprint">
  *   public final class CustomCodec extends FilterCodec {
  *
  *     public CustomCodec() {
- *       super("CustomCodec", new Lucene42Codec());
+ *       super("CustomCodec", new Lucene45Codec());
  *     }
  *
  *     public LiveDocsFormat liveDocsFormat() {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
index c7911f47dac..0e7ac44d02c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
@@ -27,7 +27,6 @@ import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.NormsFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 
 /**
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
index 571b7668c41..ae55e7a63de 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
@@ -178,7 +178,7 @@ For each field in each document, a value is stored
 that is multiplied into the score for hits on that field.
 </li>
 <li>
-{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}. 
+{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}. 
 For each field in each document, the term vector (sometimes
 called document vector) may be stored. A term vector consists of term text and
 term frequency. To add Term Vectors to your index see the 
@@ -299,17 +299,17 @@ systems that frequently run out of file handles.</td>
 <td>Encodes additional scoring factors or other per-document information.</td>
 </tr>
 <tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}</td>
 <td>.tvx</td>
 <td>Stores offset into the document data file</td>
 </tr>
 <tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}</td>
 <td>.tvd</td>
 <td>Contains information about each document that has term vectors</td>
 </tr>
 <tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}</td>
 <td>.tvf</td>
 <td>The field level info about term vectors</td>
 </tr>
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java
new file mode 100644
index 00000000000..5ec25e1003c
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java
@@ -0,0 +1,141 @@
+package org.apache.lucene.codecs.lucene45;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
+import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+
+/**
+ * Implements the Lucene 4.5 index format, with configurable per-field postings
+ * and docvalues formats.
+ * <p>
+ * If you want to reuse functionality of this codec in another codec, extend
+ * {@link FilterCodec}.
+ *
+ * @see org.apache.lucene.codecs.lucene45 package documentation for file format details.
+ * @lucene.experimental
+ */
+// NOTE: if we make largish changes in a minor release, easier to just make Lucene46Codec or whatever
+// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
+// (it writes a minor version, etc).
+public class Lucene45Codec extends Codec {
+  private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
+  private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat();
+  private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat();
+  private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat();
+  private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat();
+  
+  private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+    @Override
+    public PostingsFormat getPostingsFormatForField(String field) {
+      return Lucene45Codec.this.getPostingsFormatForField(field);
+    }
+  };
+  
+  
+  private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+    @Override
+    public DocValuesFormat getDocValuesFormatForField(String field) {
+      return Lucene45Codec.this.getDocValuesFormatForField(field);
+    }
+  };
+
+  /** Sole constructor. */
+  public Lucene45Codec() {
+    super("Lucene45");
+  }
+  
+  @Override
+  public final StoredFieldsFormat storedFieldsFormat() {
+    return fieldsFormat;
+  }
+  
+  @Override
+  public final TermVectorsFormat termVectorsFormat() {
+    return vectorsFormat;
+  }
+
+  @Override
+  public final PostingsFormat postingsFormat() {
+    return postingsFormat;
+  }
+  
+  @Override
+  public final FieldInfosFormat fieldInfosFormat() {
+    return fieldInfosFormat;
+  }
+  
+  @Override
+  public final SegmentInfoFormat segmentInfoFormat() {
+    return infosFormat;
+  }
+  
+  @Override
+  public final LiveDocsFormat liveDocsFormat() {
+    return liveDocsFormat;
+  }
+
+  /** Returns the postings format that should be used for writing 
+   *  new segments of <code>field</code>.
+   *  
+   *  The default implementation always returns "Lucene41"
+   */
+  public PostingsFormat getPostingsFormatForField(String field) {
+    return defaultFormat;
+  }
+  
+  /** Returns the docvalues format that should be used for writing 
+   *  new segments of <code>field</code>.
+   *  
+   *  The default implementation always returns "Lucene45"
+   */
+  public DocValuesFormat getDocValuesFormatForField(String field) {
+    return defaultDVFormat;
+  }
+  
+  @Override
+  public final DocValuesFormat docValuesFormat() {
+    return docValuesFormat;
+  }
+
+  private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
+  private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene45");
+
+  private final NormsFormat normsFormat = new Lucene42NormsFormat();
+
+  @Override
+  public final NormsFormat normsFormat() {
+    return normsFormat;
+  }
+}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
similarity index 93%
rename from lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
index b5124871237..942ee228045 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.diskdv;
+package org.apache.lucene.codecs.lucene45;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -37,8 +37,8 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
 import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
-/** writer for {@link DiskDocValuesFormat} */
-public class DiskDocValuesConsumer extends DocValuesConsumer {
+/** writer for {@link Lucene45DocValuesFormat} */
+public class Lucene45DocValuesConsumer extends DocValuesConsumer {
 
   static final int BLOCK_SIZE = 16384;
   static final int ADDRESS_INTERVAL = 16;
@@ -60,15 +60,15 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
   final IndexOutput data, meta;
   final int maxDoc;
   
-  public DiskDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+  public Lucene45DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     boolean success = false;
     try {
       String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
       data = state.directory.createOutput(dataName, state.context);
-      CodecUtil.writeHeader(data, dataCodec, DiskDocValuesFormat.VERSION_CURRENT);
+      CodecUtil.writeHeader(data, dataCodec, Lucene45DocValuesFormat.VERSION_CURRENT);
       String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
       meta = state.directory.createOutput(metaName, state.context);
-      CodecUtil.writeHeader(meta, metaCodec, DiskDocValuesFormat.VERSION_CURRENT);
+      CodecUtil.writeHeader(meta, metaCodec, Lucene45DocValuesFormat.VERSION_CURRENT);
       maxDoc = state.segmentInfo.getDocCount();
       success = true;
     } finally {
@@ -140,7 +140,7 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
       format = DELTA_COMPRESSED;
     }
     meta.writeVInt(field.number);
-    meta.writeByte(DiskDocValuesFormat.NUMERIC);
+    meta.writeByte(Lucene45DocValuesFormat.NUMERIC);
     meta.writeVInt(format);
     meta.writeVInt(PackedInts.VERSION_CURRENT);
     meta.writeLong(data.getFilePointer());
@@ -189,7 +189,7 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
   public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
     // write the byte[] data
     meta.writeVInt(field.number);
-    meta.writeByte(DiskDocValuesFormat.BINARY);
+    meta.writeByte(Lucene45DocValuesFormat.BINARY);
     int minLength = Integer.MAX_VALUE;
     int maxLength = Integer.MIN_VALUE;
     final long startFP = data.getFilePointer();
@@ -242,7 +242,7 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
     } else {
       // header
       meta.writeVInt(field.number);
-      meta.writeByte(DiskDocValuesFormat.BINARY);
+      meta.writeByte(Lucene45DocValuesFormat.BINARY);
       meta.writeVInt(BINARY_PREFIX_COMPRESSED);
       // now write the bytes: sharing prefixes within a block
       final long startFP = data.getFilePointer();
@@ -315,7 +315,7 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
       values = MissingOrdRemapper.insertEmptyValue(values);
     }
     meta.writeVInt(field.number);
-    meta.writeByte(DiskDocValuesFormat.SORTED);
+    meta.writeByte(Lucene45DocValuesFormat.SORTED);
     addTermsDict(field, values);
     addNumericField(field, docToOrd, false);
   }
@@ -323,7 +323,7 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
   @Override
   public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
     meta.writeVInt(field.number);
-    meta.writeByte(DiskDocValuesFormat.SORTED_SET);
+    meta.writeByte(Lucene45DocValuesFormat.SORTED_SET);
     // write the ord -> byte[] as a binary field
     addTermsDict(field, values);
     // write the stream of ords as a numeric field
@@ -332,7 +332,7 @@ public class DiskDocValuesConsumer extends DocValuesConsumer {
     
     // write the doc -> ord count as a absolute index to the stream
     meta.writeVInt(field.number);
-    meta.writeByte(DiskDocValuesFormat.NUMERIC);
+    meta.writeByte(Lucene45DocValuesFormat.NUMERIC);
     meta.writeVInt(DELTA_COMPRESSED);
     meta.writeVInt(PackedInts.VERSION_CURRENT);
     meta.writeLong(data.getFilePointer());
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
new file mode 100644
index 00000000000..68a44370b30
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
@@ -0,0 +1,167 @@
+package org.apache.lucene.codecs.lucene45;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.SmallFloat;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * Lucene 4.5 DocValues format.
+ * <p>
+ * Encodes the four per-document value types (Numeric,Binary,Sorted,SortedSet) with these strategies:
+ * <p>
+ * {@link DocValuesType#NUMERIC NUMERIC}:
+ * <ul>
+ *    <li>Delta-compressed: per-document integers written in blocks of 16k. For each block
+ *        the minimum value in that block is encoded, and each entry is a delta from that 
+ *        minimum value. Each block of deltas is compressed with bitpacking. For more 
+ *        information, see {@link BlockPackedWriter}.
+ *    <li>Table-compressed: when the number of unique values is very small (&lt; 256), and
+ *        when there are unused "gaps" in the range of values used (such as {@link SmallFloat}), 
+ *        a lookup table is written instead. Each per-document entry is instead the ordinal 
+ *        to this table, and those ordinals are compressed with bitpacking ({@link PackedInts}). 
+ *    <li>GCD-compressed: when all numbers share a common divisor, such as dates, the greatest
+ *        common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics.
+ * </ul>
+ * <p>
+ * {@link DocValuesType#BINARY BINARY}:
+ * <ul>
+ *    <li>Fixed-width Binary: one large concatenated byte[] is written, along with the fixed length.
+ *        Each document's value can be addressed directly with multiplication ({@code docID * length}). 
+ *    <li>Variable-width Binary: one large concatenated byte[] is written, along with end addresses 
+ *        for each document. The addresses are written in blocks of 16k, with the current absolute
+ *        start for the block, and the average (expected) delta per entry. For each document the 
+ *        deviation from the delta (actual - expected) is written.
+ *    <li>Prefix-compressed Binary: nocommit
+ * </ul>
+ * <p>
+ * {@link DocValuesType#SORTED SORTED}:
+ * <ul>
+ *    <li>Sorted: an FST mapping deduplicated terms to ordinals is written, along with the per-document
+ *        ordinals written using one of the numeric strategies above.
+ * </ul>
+ * <p>
+ * {@link DocValuesType#SORTED_SET SORTED_SET}:
+ * <ul>
+ *    <li>SortedSet: an FST mapping deduplicated terms to ordinals is written, along with the per-document
+ *        ordinal list written using one of the binary strategies above.  
+ * </ul>
+ * <p>
+ * Files:
+ * <ol>
+ *   <li><tt>.dvd</tt>: DocValues data</li>
+ *   <li><tt>.dvm</tt>: DocValues metadata</li>
+ * </ol>
+ * <ol>
+ *   <li><a name="dvm" id="dvm"></a>
+ *   <p>The DocValues metadata or .dvm file.</p>
+ *   <p>For DocValues field, this stores metadata, such as the offset into the 
+ *      DocValues data (.dvd)</p>
+ *   <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup></p>
+ *   <ul>
+ *     <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry</li>
+ *     <li>NumericEntry --&gt; DataOffset,NumericCompressionType,PackedVersion</li>
+ *     <li>BinaryEntry --&gt; DataOffset,DataLength,MinLength,MaxLength,PackedVersion?,BlockSize?</li>
+ *     <li>SortedEntry --&gt; DataOffset,ValueCount</li>
+ *     <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --&gt; {@link DataOutput#writeVInt VInt}</li>
+ *     <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong Int64}</li>
+ *     <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
+ *     <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
+ *   </ul>
+ *   <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
+ *      and an ordinary NumericEntry for the document-to-ord metadata.</p>
+ *   <p>SortedSet fields have two entries: a SortedEntry with the FST metadata,
+ *      and an ordinary BinaryEntry for the document-to-ord-list metadata.</p>
+ *   <p>FieldNumber of -1 indicates the end of metadata.</p>
+ *   <p>EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)</p>
+ *   <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
+ *   <p>NumericCompressionType indicates how Numeric values will be compressed:
+ *      <ul>
+ *         <li>0 --&gt; delta-compressed. For each block of 16k integers, every integer is delta-encoded
+ *             from the minimum value within the block. 
+ *         <li>1 --&gt, gcd-compressed. When all integers share a common divisor, only quotients are stored
+ *             using blocks of delta-encoded ints.
+ *         <li>2 --&gt; table-compressed. When the number of unique numeric values is small and it would save space,
+ *             a lookup table of unique values is written, followed by the ordinal for each document.
+ *      </ul>
+ *   <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
+ *      If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
+ *      Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
+ *      is written for the addresses.
+ *   <li><a name="dvd" id="dvd"></a>
+ *   <p>The DocValues data or .dvd file.</p>
+ *   <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
+ *   <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup></p>
+ *   <ul>
+ *     <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics</li>
+ *     <li>BinaryData --&gt;  {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
+ *     <li>SortedData --&gt; {@link FST FST&lt;Int64&gt;}</li>
+ *     <li>DeltaCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
+ *     <li>TableCompressedNumerics --&gt; TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
+ *     <li>GCDCompressedNumerics --&gt; MinValue,GCD,{@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
+ *     <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
+ *     <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
+ *     <li>MinValue --&gt; {@link DataOutput#writeLong Int64}</li>
+ *     <li>GCD --&gt; {@link DataOutput#writeLong Int64}</li>
+ *   </ul>
+ *   <p>SortedSet entries store the list of ordinals in their BinaryData as a
+ *      sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
+ * </ol>
+ * @lucene.experimental
+ */
+// nocommit: docs are incomplete
+public final class Lucene45DocValuesFormat extends DocValuesFormat {
+
+  public Lucene45DocValuesFormat() {
+    super("Lucene45");
+  }
+
+  @Override
+  public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    return new Lucene45DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+  }
+
+  @Override
+  public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
+    return new Lucene45DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+  }
+  
+  public static final String DATA_CODEC = "Lucene45DocValuesData";
+  public static final String DATA_EXTENSION = "dvd";
+  public static final String META_CODEC = "Lucene45ValuesMetadata";
+  public static final String META_EXTENSION = "dvm";
+  public static final int VERSION_START = 0;
+  public static final int VERSION_CURRENT = VERSION_START;
+  public static final byte NUMERIC = 0;
+  public static final byte BINARY = 1;
+  public static final byte SORTED = 2;
+  public static final byte SORTED_SET = 3;
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
new file mode 100644
index 00000000000..b19a34e169c
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
@@ -0,0 +1,755 @@
+package org.apache.lucene.codecs.lucene45;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.TABLE_COMPRESSED;
+
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED;
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
+import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+import org.apache.lucene.util.packed.PackedInts;
+
+class Lucene45DocValuesProducer extends DocValuesProducer {
+  private final Map<Integer,NumericEntry> numerics;
+  private final Map<Integer,BinaryEntry> binaries;
+  private final Map<Integer,NumericEntry> ords;
+  private final Map<Integer,NumericEntry> ordIndexes;
+  private final IndexInput data;
+  private final int maxDoc;
+
+  // memory-resident structures
+  private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
+  private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
+  
+  Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+    // read in the entries from the metadata file.
+    IndexInput in = state.directory.openInput(metaName, state.context);
+    this.maxDoc = state.segmentInfo.getDocCount();
+    boolean success = false;
+    final int version;
+    try {
+      version = CodecUtil.checkHeader(in, metaCodec, 
+                                      Lucene45DocValuesFormat.VERSION_CURRENT,
+                                      Lucene45DocValuesFormat.VERSION_CURRENT);
+      numerics = new HashMap<Integer,NumericEntry>();
+      ords = new HashMap<Integer,NumericEntry>();
+      ordIndexes = new HashMap<Integer,NumericEntry>();
+      binaries = new HashMap<Integer,BinaryEntry>();
+      readFields(in, state.fieldInfos);
+
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(in);
+      } else {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
+
+    success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.openInput(dataName, state.context);
+      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
+                                                 Lucene45DocValuesFormat.VERSION_CURRENT,
+                                                 Lucene45DocValuesFormat.VERSION_CURRENT);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch");
+      }
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
+  }
+  
+  private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
+    int fieldNumber = meta.readVInt();
+    while (fieldNumber != -1) {
+      byte type = meta.readByte();
+      if (type == Lucene45DocValuesFormat.NUMERIC) {
+        numerics.put(fieldNumber, readNumericEntry(meta));
+      } else if (type == Lucene45DocValuesFormat.BINARY) {
+        BinaryEntry b = readBinaryEntry(meta);
+        binaries.put(fieldNumber, b);
+      } else if (type == Lucene45DocValuesFormat.SORTED) {
+        // sorted = binary + numeric
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        if (meta.readByte() != Lucene45DocValuesFormat.BINARY) {
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        BinaryEntry b = readBinaryEntry(meta);
+        binaries.put(fieldNumber, b);
+        
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        NumericEntry n = readNumericEntry(meta);
+        ords.put(fieldNumber, n);
+      } else if (type == Lucene45DocValuesFormat.SORTED_SET) {
+        // sortedset = binary + numeric + ordIndex
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        if (meta.readByte() != Lucene45DocValuesFormat.BINARY) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        BinaryEntry b = readBinaryEntry(meta);
+        binaries.put(fieldNumber, b);
+        
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        NumericEntry n1 = readNumericEntry(meta);
+        ords.put(fieldNumber, n1);
+        
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
+        }
+        NumericEntry n2 = readNumericEntry(meta);
+        ordIndexes.put(fieldNumber, n2);
+      } else {
+        throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta);
+      }
+      fieldNumber = meta.readVInt();
+    }
+  }
+  
+  static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
+    NumericEntry entry = new NumericEntry();
+    entry.format = meta.readVInt();
+    entry.packedIntsVersion = meta.readVInt();
+    entry.offset = meta.readLong();
+    entry.count = meta.readVLong();
+    entry.blockSize = meta.readVInt();
+    switch(entry.format) {
+      case GCD_COMPRESSED:
+        entry.minValue = meta.readLong();
+        entry.gcd = meta.readLong();
+        break;
+      case TABLE_COMPRESSED:
+        if (entry.count > Integer.MAX_VALUE) {
+          throw new CorruptIndexException("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
+        }
+        final int uniqueValues = meta.readVInt();
+        if (uniqueValues > 256) {
+          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
+        }
+        entry.table = new long[uniqueValues];
+        for (int i = 0; i < uniqueValues; ++i) {
+          entry.table[i] = meta.readLong();
+        }
+        break;
+      case DELTA_COMPRESSED:
+        break;
+      default:
+        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+    }
+    return entry;
+  }
+  
+  static BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
+    BinaryEntry entry = new BinaryEntry();
+    entry.format = meta.readVInt();
+    entry.minLength = meta.readVInt();
+    entry.maxLength = meta.readVInt();
+    entry.count = meta.readVLong();
+    entry.offset = meta.readLong();
+    switch(entry.format) {
+      case BINARY_FIXED_UNCOMPRESSED:
+        break;
+      case BINARY_PREFIX_COMPRESSED:
+        entry.addressInterval = meta.readVInt();
+        entry.addressesOffset = meta.readLong();
+        entry.packedIntsVersion = meta.readVInt();
+        entry.blockSize = meta.readVInt();
+        break;
+      case BINARY_VARIABLE_UNCOMPRESSED:
+        entry.addressesOffset = meta.readLong();
+        entry.packedIntsVersion = meta.readVInt();
+        entry.blockSize = meta.readVInt();
+        break;
+      default:
+        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+    }
+    return entry;
+  }
+
+  @Override
+  public NumericDocValues getNumeric(FieldInfo field) throws IOException {
+    NumericEntry entry = numerics.get(field.number);
+    return getNumeric(entry);
+  }
+  
+  LongNumericDocValues getNumeric(NumericEntry entry) throws IOException {
+    final IndexInput data = this.data.clone();
+    data.seek(entry.offset);
+
+    switch (entry.format) {
+      case DELTA_COMPRESSED:
+        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+        return new LongNumericDocValues() {
+          @Override
+          public long get(long id) {
+            return reader.get(id);
+          }
+        };
+      case GCD_COMPRESSED:
+        final long min = entry.minValue;
+        final long mult = entry.gcd;
+        final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+        return new LongNumericDocValues() {
+          @Override
+          public long get(long id) {
+            return min + mult * quotientReader.get(id);
+          }
+        };
+      case TABLE_COMPRESSED:
+        final long table[] = entry.table;
+        final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
+        final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
+        return new LongNumericDocValues() {
+          @Override
+          public long get(long id) {
+            return table[(int) ords.get((int) id)];
+          }
+        };
+      default:
+        throw new AssertionError();
+    }
+  }
+
+  @Override
+  public BinaryDocValues getBinary(FieldInfo field) throws IOException {
+    BinaryEntry bytes = binaries.get(field.number);
+    switch(bytes.format) {
+      case BINARY_FIXED_UNCOMPRESSED:
+        return getFixedBinary(field, bytes);
+      case BINARY_VARIABLE_UNCOMPRESSED:
+        return getVariableBinary(field, bytes);
+      case BINARY_PREFIX_COMPRESSED:
+        return getCompressedBinary(field, bytes);
+      default:
+        throw new AssertionError();
+    }
+  }
+  
+  private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
+    final IndexInput data = this.data.clone();
+
+    return new LongBinaryDocValues() {
+      @Override
+      public void get(long id, BytesRef result) {
+        long address = bytes.offset + id * bytes.maxLength;
+        try {
+          data.seek(address);
+          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
+          // assume "they" own the bytes after calling this!
+          final byte[] buffer = new byte[bytes.maxLength];
+          data.readBytes(buffer, 0, buffer.length);
+          result.bytes = buffer;
+          result.offset = 0;
+          result.length = buffer.length;
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+  }
+  
+  private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+    final IndexInput data = this.data.clone();
+    
+    final MonotonicBlockPackedReader addresses;
+    synchronized (addressInstances) {
+      MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
+      if (addrInstance == null) {
+        data.seek(bytes.addressesOffset);
+        addrInstance = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, false);
+        addressInstances.put(field.number, addrInstance);
+      }
+      addresses = addrInstance;
+    }
+
+    return new LongBinaryDocValues() {
+      @Override
+      public void get(long id, BytesRef result) {
+        long startAddress = bytes.offset + (id == 0 ? 0 : addresses.get(id-1));
+        long endAddress = bytes.offset + addresses.get(id);
+        int length = (int) (endAddress - startAddress);
+        try {
+          data.seek(startAddress);
+          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
+          // assume "they" own the bytes after calling this!
+          final byte[] buffer = new byte[length];
+          data.readBytes(buffer, 0, buffer.length);
+          result.bytes = buffer;
+          result.offset = 0;
+          result.length = length;
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+  }
+
+  private BinaryDocValues getCompressedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+    final IndexInput data = this.data.clone();
+    final long interval = bytes.addressInterval;
+
+    final MonotonicBlockPackedReader addresses;
+    synchronized (addressInstances) {
+      MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
+      if (addrInstance == null) {
+        data.seek(bytes.addressesOffset);
+        final long size;
+        if (bytes.count % interval == 0) {
+          size = bytes.count / interval;
+        } else {
+          size = 1L + bytes.count / interval;
+        }
+        addrInstance = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, size, false);
+        addressInstances.put(field.number, addrInstance);
+      }
+      addresses = addrInstance;
+    }
+    
+    return new CompressedBinaryDocValues(bytes, addresses, data);
+  }
+
+  @Override
+  public SortedDocValues getSorted(FieldInfo field) throws IOException {
+    final int valueCount = (int) binaries.get(field.number).count;
+    final BinaryDocValues binary = getBinary(field);
+    NumericEntry entry = ords.get(field.number);
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset);
+    final BlockPackedReader ordinals = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+    
+    return new SortedDocValues() {
+
+      @Override
+      public int getOrd(int docID) {
+        return (int) ordinals.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        binary.get(ord, result);
+      }
+
+      @Override
+      public int getValueCount() {
+        return valueCount;
+      }
+
+      @Override
+      public int lookupTerm(BytesRef key) {
+        if (binary instanceof CompressedBinaryDocValues) {
+          return (int) ((CompressedBinaryDocValues)binary).lookupTerm(key);
+        } else {
+        return super.lookupTerm(key);
+        }
+      }
+
+      @Override
+      public TermsEnum termsEnum() {
+        if (binary instanceof CompressedBinaryDocValues) {
+          return ((CompressedBinaryDocValues)binary).getTermsEnum();
+        } else {
+          return super.termsEnum();
+        }
+      }
+    };
+  }
+
+  @Override
+  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+    final long valueCount = binaries.get(field.number).count;
+    // we keep the byte[]s and list of ords on disk, these could be large
+    final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
+    final LongNumericDocValues ordinals = getNumeric(ords.get(field.number));
+    // but the addresses to the ord stream are in RAM
+    final MonotonicBlockPackedReader ordIndex;
+    synchronized (ordIndexInstances) {
+      MonotonicBlockPackedReader ordIndexInstance = ordIndexInstances.get(field.number);
+      if (ordIndexInstance == null) {
+        NumericEntry entry = ordIndexes.get(field.number);
+        IndexInput data = this.data.clone();
+        data.seek(entry.offset);
+        ordIndexInstance = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
+        ordIndexInstances.put(field.number, ordIndexInstance);
+      }
+      ordIndex = ordIndexInstance;
+    }
+    
+    return new SortedSetDocValues() {
+      long offset;
+      long endOffset;
+      
+      @Override
+      public long nextOrd() {
+        if (offset == endOffset) {
+          return NO_MORE_ORDS;
+        } else {
+          long ord = ordinals.get(offset);
+          offset++;
+          return ord;
+        }
+      }
+
+      @Override
+      public void setDocument(int docID) {
+        offset = (docID == 0 ? 0 : ordIndex.get(docID-1));
+        endOffset = ordIndex.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(long ord, BytesRef result) {
+        binary.get(ord, result);
+      }
+
+      @Override
+      public long getValueCount() {
+        return valueCount;
+      }
+      
+      @Override
+      public long lookupTerm(BytesRef key) {
+        if (binary instanceof CompressedBinaryDocValues) {
+          return ((CompressedBinaryDocValues)binary).lookupTerm(key);
+        } else {
+          return super.lookupTerm(key);
+        }
+      }
+
+      @Override
+      public TermsEnum termsEnum() {
+        if (binary instanceof CompressedBinaryDocValues) {
+          return ((CompressedBinaryDocValues)binary).getTermsEnum();
+        } else {
+          return super.termsEnum();
+        }
+      }
+    };
+  }
+
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    // nocommit: only use this if the field's entry has missing values (write that),
+    // otherwise return MatchAllBits
+    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
+      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+    } else {
+      return new Bits.MatchAllBits(maxDoc);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    data.close();
+  }
+  
+  static class NumericEntry {
+    long offset;
+
+    int format;
+    int packedIntsVersion;
+    long count;
+    int blockSize;
+    
+    long minValue;
+    long gcd;
+    long table[];
+  }
+  
+  static class BinaryEntry {
+    long offset;
+
+    int format;
+    long count;
+    int minLength;
+    int maxLength;
+    long addressesOffset;
+    long addressInterval;
+    int packedIntsVersion;
+    int blockSize;
+  }
+  
+  // internally we compose complex dv (sorted/sortedset) from other ones
+  static abstract class LongNumericDocValues extends NumericDocValues {
+    @Override
+    public final long get(int docID) {
+      return get((long) docID);
+    }
+    
+    abstract long get(long id);
+  }
+  
+  static abstract class LongBinaryDocValues extends BinaryDocValues {
+    @Override
+    public final void get(int docID, BytesRef result) {
+      get((long)docID, result);
+    }
+    
+    abstract void get(long id, BytesRef Result);
+  }
+  
+  // in the compressed case, we add a few additional operations for
+  // more efficient reverse lookup and enumeration
+  static class CompressedBinaryDocValues extends LongBinaryDocValues {
+    final BinaryEntry bytes;
+    final long interval;
+    final long numValues;
+    final long numIndexValues;
+    final MonotonicBlockPackedReader addresses;
+    final IndexInput data;
+    final TermsEnum termsEnum;
+    
+    public CompressedBinaryDocValues(BinaryEntry bytes, MonotonicBlockPackedReader addresses, IndexInput data) throws IOException {
+      this.bytes = bytes;
+      this.interval = bytes.addressInterval;
+      this.addresses = addresses;
+      this.data = data;
+      this.numValues = bytes.count;
+      this.numIndexValues = addresses.size();
+      this.termsEnum = getTermsEnum(data);
+    }
+    
+    @Override
+    public void get(long id, BytesRef result) {
+      try {
+        termsEnum.seekExact(id);
+        BytesRef term = termsEnum.term();
+        result.bytes = term.bytes;
+        result.offset = term.offset;
+        result.length = term.length;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    long lookupTerm(BytesRef key) {
+      try {
+        SeekStatus status = termsEnum.seekCeil(key);
+        if (status == SeekStatus.END) {
+          return -numValues-1;
+        } else if (status == SeekStatus.FOUND) {
+          return termsEnum.ord();
+        } else {
+          return -termsEnum.ord()-1;
+        }
+      } catch (IOException bogus) {
+        throw new RuntimeException(bogus);
+      }
+    }
+    
+    TermsEnum getTermsEnum() {
+      try {
+        return getTermsEnum(data.clone());
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    private TermsEnum getTermsEnum(final IndexInput input) throws IOException {
+      input.seek(bytes.offset);
+      
+      return new TermsEnum() {
+        private long currentOrd = -1;
+        // TODO: maxLength is negative when all terms are merged away...
+        private final BytesRef termBuffer = new BytesRef(bytes.maxLength < 0 ? 0 : bytes.maxLength);
+        private final BytesRef term = new BytesRef(); // TODO: paranoia?
+
+        @Override
+        public BytesRef next() throws IOException {
+          if (doNext() == null) {
+            return null;
+          } else {
+            setTerm();
+            return term;
+          }
+        }
+        
+        private BytesRef doNext() throws IOException {
+          if (++currentOrd >= numValues) {
+            return null;
+          } else {
+            int start = input.readVInt();
+            int suffix = input.readVInt();
+            input.readBytes(termBuffer.bytes, start, suffix);
+            termBuffer.length = start + suffix;
+            return termBuffer;
+          }
+        }
+
+        @Override
+        public SeekStatus seekCeil(BytesRef text) throws IOException {
+          // binary-search just the index values to find the block,
+          // then scan within the block
+          long low = 0;
+          long high = numIndexValues-1;
+
+          while (low <= high) {
+            long mid = (low + high) >>> 1;
+            doSeek(mid * interval);
+            int cmp = termBuffer.compareTo(text);
+
+            if (cmp < 0) {
+              low = mid + 1;
+            } else if (cmp > 0) {
+              high = mid - 1;
+            } else {
+              // we got lucky, found an indexed term
+              setTerm();
+              return SeekStatus.FOUND;
+            }
+          }
+          
+          if (numIndexValues == 0) {
+            return SeekStatus.END;
+          }
+          
+          // block before insertion point
+          long block = low-1;
+          doSeek(block < 0 ? -1 : block * interval);
+          
+          while (doNext() != null) {
+            int cmp = termBuffer.compareTo(text);
+            if (cmp == 0) {
+              setTerm();
+              return SeekStatus.FOUND;
+            } else if (cmp > 0) {
+              setTerm();
+              return SeekStatus.NOT_FOUND;
+            }
+          }
+          
+          return SeekStatus.END;
+        }
+
+        @Override
+        public void seekExact(long ord) throws IOException {
+          doSeek(ord);
+          setTerm();
+        }
+        
+        private void doSeek(long ord) throws IOException {
+          long block = ord / interval;
+
+          if (ord >= currentOrd && block == currentOrd / interval) {
+            // seek within current block
+          } else {
+            // position before start of block
+            currentOrd = ord - ord % interval - 1;
+            input.seek(bytes.offset + addresses.get(block));
+          }
+          
+          while (currentOrd < ord) {
+            doNext();
+          }
+        }
+        
+        private void setTerm() {
+          // TODO: is there a cleaner way
+          term.bytes = new byte[termBuffer.length];
+          term.offset = 0;
+          term.copyBytes(termBuffer);
+        }
+
+        @Override
+        public BytesRef term() throws IOException {
+          return term;
+        }
+
+        @Override
+        public long ord() throws IOException {
+          return currentOrd;
+        }
+        
+        @Override
+        public Comparator<BytesRef> getComparator() {
+          return BytesRef.getUTF8SortedAsUnicodeComparator();
+        }
+
+        @Override
+        public int docFreq() throws IOException {
+          throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public long totalTermFreq() throws IOException {
+          return -1;
+        }
+
+        @Override
+        public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+          throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/package.html
new file mode 100644
index 00000000000..677c176a89b
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/package.html
@@ -0,0 +1,396 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Lucene 4.5 file format.
+
+<h1>Apache Lucene - Index File Formats</h1>
+<div>
+<ul>
+<li><a href="#Introduction">Introduction</a></li>
+<li><a href="#Definitions">Definitions</a>
+  <ul>
+  <li><a href="#Inverted_Indexing">Inverted Indexing</a></li>
+  <li><a href="#Types_of_Fields">Types of Fields</a></li>
+  <li><a href="#Segments">Segments</a></li>
+  <li><a href="#Document_Numbers">Document Numbers</a></li>
+  </ul>
+</li>
+<li><a href="#Overview">Index Structure Overview</a></li>
+<li><a href="#File_Naming">File Naming</a></li>
+<li><a href="#file-names">Summary of File Extensions</a></li>
+  <ul>
+  <li><a href="#Lock_File">Lock File</a></li>
+  <li><a href="#History">History</a></li>
+  <li><a href="#Limitations">Limitations</a></li>
+  </ul>
+</ul>
+</div>
+<a name="Introduction"></a>
+<h2>Introduction</h2>
+<div>
+<p>This document defines the index file formats used in this version of Lucene.
+If you are using a different version of Lucene, please consult the copy of
+<code>docs/</code> that was distributed with
+the version you are using.</p>
+<p>Apache Lucene is written in Java, but several efforts are underway to write
+<a href="http://wiki.apache.org/lucene-java/LuceneImplementations">versions of
+Lucene in other programming languages</a>. If these versions are to remain
+compatible with Apache Lucene, then a language-independent definition of the
+Lucene index format is required. This document thus attempts to provide a
+complete and independent definition of the Apache Lucene file formats.</p>
+<p>As Lucene evolves, this document should evolve. Versions of Lucene in
+different programming languages should endeavor to agree on file formats, and
+generate new versions of this document.</p>
+</div>
+<a name="Definitions" id="Definitions"></a>
+<h2>Definitions</h2>
+<div>
+<p>The fundamental concepts in Lucene are index, document, field and term.</p>
+<p>An index contains a sequence of documents.</p>
+<ul>
+<li>A document is a sequence of fields.</li>
+<li>A field is a named sequence of terms.</li>
+<li>A term is a sequence of bytes.</li>
+</ul>
+<p>The same sequence of bytes in two different fields is considered a different 
+term. Thus terms are represented as a pair: the string naming the field, and the
+bytes within the field.</p>
+<a name="Inverted_Indexing"></a>
+<h3>Inverted Indexing</h3>
+<p>The index stores statistics about terms in order to make term-based search
+more efficient. Lucene's index falls into the family of indexes known as an
+<i>inverted index.</i> This is because it can list, for a term, the documents
+that contain it. This is the inverse of the natural relationship, in which
+documents list terms.</p>
+<a name="Types_of_Fields"></a>
+<h3>Types of Fields</h3>
+<p>In Lucene, fields may be <i>stored</i>, in which case their text is stored
+in the index literally, in a non-inverted manner. Fields that are inverted are
+called <i>indexed</i>. A field may be both stored and indexed.</p>
+<p>The text of a field may be <i>tokenized</i> into terms to be indexed, or the
+text of a field may be used literally as a term to be indexed. Most fields are
+tokenized, but sometimes it is useful for certain identifier fields to be
+indexed literally.</p>
+<p>See the {@link org.apache.lucene.document.Field Field}
+java docs for more information on Fields.</p>
+<a name="Segments" id="Segments"></a>
+<h3>Segments</h3>
+<p>Lucene indexes may be composed of multiple sub-indexes, or <i>segments</i>.
+Each segment is a fully independent index, which could be searched separately.
+Indexes evolve by:</p>
+<ol>
+<li>Creating new segments for newly added documents.</li>
+<li>Merging existing segments.</li>
+</ol>
+<p>Searches may involve multiple segments and/or multiple indexes, each index
+potentially composed of a set of segments.</p>
+<a name="Document_Numbers"></a>
+<h3>Document Numbers</h3>
+<p>Internally, Lucene refers to documents by an integer <i>document number</i>.
+The first document added to an index is numbered zero, and each subsequent
+document added gets a number one greater than the previous.</p>
+<p>Note that a document's number may change, so caution should be taken when
+storing these numbers outside of Lucene. In particular, numbers may change in
+the following situations:</p>
+<ul>
+<li>
+<p>The numbers stored in each segment are unique only within the segment, and
+must be converted before they can be used in a larger context. The standard
+technique is to allocate each segment a range of values, based on the range of
+numbers used in that segment. To convert a document number from a segment to an
+external value, the segment's <i>base</i> document number is added. To convert
+an external value back to a segment-specific value, the segment is identified
+by the range that the external value is in, and the segment's base value is
+subtracted. For example two five document segments might be combined, so that
+the first segment has a base value of zero, and the second of five. Document
+three from the second segment would have an external value of eight.</p>
+</li>
+<li>
+<p>When documents are deleted, gaps are created in the numbering. These are
+eventually removed as the index evolves through merging. Deleted documents are
+dropped when segments are merged. A freshly-merged segment thus has no gaps in
+its numbering.</p>
+</li>
+</ul>
+</div>
+<a name="Overview" id="Overview"></a>
+<h2>Index Structure Overview</h2>
+<div>
+<p>Each segment index maintains the following:</p>
+<ul>
+<li>
+{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}.
+   This contains metadata about a segment, such as the number of documents,
+   what files it uses, 
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Field names}. 
+   This contains the set of field names used in the index.
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}. 
+This contains, for each document, a list of attribute-value pairs, where the attributes 
+are field names. These are used to store auxiliary information about the document, such as 
+its title, url, or an identifier to access a database. The set of stored fields are what is 
+returned for each hit when searching. This is keyed by document number.
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. 
+A dictionary containing all of the terms used in all of the
+indexed fields of all of the documents. The dictionary also contains the number
+of documents which contain the term, and pointers to the term's frequency and
+proximity data.
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. 
+For each term in the dictionary, the numbers of all the
+documents that contain that term, and the frequency of the term in that
+document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. 
+For each term in the dictionary, the positions that the
+term occurs in each document. Note that this will not exist if all fields in
+all documents omit position data.
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}. 
+For each field in each document, a value is stored
+that is multiplied into the score for hits on that field.
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}. 
+For each field in each document, the term vector (sometimes
+called document vector) may be stored. A term vector consists of term text and
+term frequency. To add Term Vectors to your index see the 
+{@link org.apache.lucene.document.Field Field} constructors
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-document values}. 
+Like stored values, these are also keyed by document
+number, but are generally intended to be loaded into main memory for fast
+access. Whereas stored values are generally intended for summary results from
+searches, per-document values are useful for things like scoring factors.
+</li>
+<li>
+{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. 
+An optional file indicating which documents are deleted.
+</li>
+</ul>
+<p>Details on each of these are provided in their linked pages.</p>
+</div>
+<a name="File_Naming"></a>
+<h2>File Naming</h2>
+<div>
+<p>All files belonging to a segment have the same name with varying extensions.
+The extensions correspond to the different file formats described below. When
+using the Compound File format (default in 1.4 and greater) these files (except
+for the Segment info file, the Lock file, and Deleted documents file) are collapsed 
+into a single .cfs file (see below for details)</p>
+<p>Typically, all segments in an index are stored in a single directory,
+although this is not required.</p>
+<p>As of version 2.1 (lock-less commits), file names are never re-used (there
+is one exception, "segments.gen", see below). That is, when any file is saved
+to the Directory it is given a never before used filename. This is achieved
+using a simple generations approach. For example, the first segments file is
+segments_1, then segments_2, etc. The generation is a sequential long integer
+represented in alpha-numeric (base 36) form.</p>
+</div>
+<a name="file-names" id="file-names"></a>
+<h2>Summary of File Extensions</h2>
+<div>
+<p>The following table summarizes the names and extensions of the files in
+Lucene:</p>
+<table cellspacing="1" cellpadding="4">
+<tr>
+<th>Name</th>
+<th>Extension</th>
+<th>Brief Description</th>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.index.SegmentInfos Segments File}</td>
+<td>segments.gen, segments_N</td>
+<td>Stores information about a commit point</td>
+</tr>
+<tr>
+<td><a href="#Lock_File">Lock File</a></td>
+<td>write.lock</td>
+<td>The Write lock prevents multiple IndexWriters from writing to the same
+file.</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}</td>
+<td>.si</td>
+<td>Stores metadata about a segment</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.store.CompoundFileDirectory Compound File}</td>
+<td>.cfs, .cfe</td>
+<td>An optional "virtual" file consisting of all the other index files for
+systems that frequently run out of file handles.</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Fields}</td>
+<td>.fnm</td>
+<td>Stores information about the fields</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}</td>
+<td>.fdx</td>
+<td>Contains pointers to field data</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}</td>
+<td>.fdt</td>
+<td>The stored fields for documents</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}</td>
+<td>.tim</td>
+<td>The term dictionary, stores term info</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}</td>
+<td>.tip</td>
+<td>The index into the Term Dictionary</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}</td>
+<td>.doc</td>
+<td>Contains the list of docs which contain each term along with frequency</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}</td>
+<td>.pos</td>
+<td>Stores position information about where a term occurs in the index</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}</td>
+<td>.pay</td>
+<td>Stores additional per-position metadata information such as character offsets and user payloads</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}</td>
+<td>.nvd, .nvm</td>
+<td>Encodes length and boost factors for docs and fields</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-Document Values}</td>
+<td>.dvd, .dvm</td>
+<td>Encodes additional scoring factors or other per-document information.</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}</td>
+<td>.tvx</td>
+<td>Stores offset into the document data file</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}</td>
+<td>.tvd</td>
+<td>Contains information about each document that has term vectors</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}</td>
+<td>.tvf</td>
+<td>The field level info about term vectors</td>
+</tr>
+<tr>
+<td>{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}</td>
+<td>.del</td>
+<td>Info about what files are deleted</td>
+</tr>
+</table>
+</div>
+<a name="Lock_File" id="Lock_File"></a>
+<h2>Lock File</h2>
+The write lock, which is stored in the index directory by default, is named
+"write.lock". If the lock directory is different from the index directory then
+the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix
+derived from the full path to the index directory. When this file is present, a
+writer is currently modifying the index (adding or removing documents). This
+lock file ensures that only one writer is modifying the index at a time.</p>
+<a name="History"></a>
+<h2>History</h2>
+<p>Compatibility notes are provided in this document, describing how file
+formats have changed from prior versions:</p>
+<ul>
+<li>In version 2.1, the file format was changed to allow lock-less commits (ie,
+no more commit lock). The change is fully backwards compatible: you can open a
+pre-2.1 index for searching or adding/deleting of docs. When the new segments
+file is saved (committed), it will be written in the new file format (meaning
+no specific "upgrade" process is needed). But note that once a commit has
+occurred, pre-2.1 Lucene will not be able to read the index.</li>
+<li>In version 2.3, the file format was changed to allow segments to share a
+single set of doc store (vectors &amp; stored fields) files. This allows for
+faster indexing in certain cases. The change is fully backwards compatible (in
+the same way as the lock-less commits change in 2.1).</li>
+<li>In version 2.4, Strings are now written as true UTF-8 byte sequence, not
+Java's modified UTF-8. See <a href="http://issues.apache.org/jira/browse/LUCENE-510">
+LUCENE-510</a> for details.</li>
+<li>In version 2.9, an optional opaque Map&lt;String,String&gt; CommitUserData
+may be passed to IndexWriter's commit methods (and later retrieved), which is
+recorded in the segments_N file. See <a href="http://issues.apache.org/jira/browse/LUCENE-1382">
+LUCENE-1382</a> for details. Also,
+diagnostics were added to each segment written recording details about why it
+was written (due to flush, merge; which OS/JRE was used; etc.). See issue
+<a href="http://issues.apache.org/jira/browse/LUCENE-1654">LUCENE-1654</a> for details.</li>
+<li>In version 3.0, compressed fields are no longer written to the index (they
+can still be read, but on merge the new segment will write them, uncompressed).
+See issue <a href="http://issues.apache.org/jira/browse/LUCENE-1960">LUCENE-1960</a> 
+for details.</li>
+<li>In version 3.1, segments records the code version that created them. See
+<a href="http://issues.apache.org/jira/browse/LUCENE-2720">LUCENE-2720</a> for details. 
+Additionally segments track explicitly whether or not they have term vectors. 
+See <a href="http://issues.apache.org/jira/browse/LUCENE-2811">LUCENE-2811</a> 
+for details.</li>
+<li>In version 3.2, numeric fields are written as natively to stored fields
+file, previously they were stored in text format only.</li>
+<li>In version 3.4, fields can omit position data while still indexing term
+frequencies.</li>
+<li>In version 4.0, the format of the inverted index became extensible via
+the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
+({@code DocValues}) was introduced. Normalization factors need no longer be a 
+single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. 
+Terms need not be unicode strings, they can be any byte sequence. Term offsets 
+can optionally be indexed into the postings lists. Payloads can be stored in the 
+term vectors.</li>
+<li>In version 4.1, the format of the postings list changed to use either
+of FOR compression or variable-byte encoding, depending upon the frequency
+of the term. Terms appearing only once were changed to inline directly into
+the term dictionary. Stored fields are compressed by default. </li>
+<li>In version 4.2, term vectors are compressed by default. DocValues has 
+a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining
+on multi-valued fields.</li>
+<li>In version 4.5, DocValues were extended to explicitly represent missing values.</li>
+</ul>
+<a name="Limitations" id="Limitations"></a>
+<h2>Limitations</h2>
+<div>
+<p>Lucene uses a Java <code>int</code> to refer to
+document numbers, and the index file format uses an <code>Int32</code>
+on-disk to store document numbers. This is a limitation
+of both the index file format and the current implementation. Eventually these
+should be replaced with either <code>UInt64</code> values, or
+better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.</p>
+</div>
+</body>
+</html>
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/package.html b/lucene/core/src/java/org/apache/lucene/codecs/package.html
index f0f12b42b75..16f53b5cdde 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/package.html
@@ -61,9 +61,13 @@ name of your codec.
   If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings
   formats for different fields, then you can register your custom postings format in the same way (in
   META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default
-  {@link org.apache.lucene.codecs.lucene42.Lucene42Codec} and override
-  {@link org.apache.lucene.codecs.lucene42.Lucene42Codec#getPostingsFormatForField(String)} to return your custom
+  {@link org.apache.lucene.codecs.lucene45.Lucene45Codec} and override
+  {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getPostingsFormatForField(String)} to return your custom
   postings format.
 </p>
+<p>
+  Similarly, if you just want to customise the {@link org.apache.lucene.codecs.DocValuesFormat} per-field, have 
+  a look at {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getDocValuesFormatForField(String)}.
+</p>
 </body>
 </html>
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 5a702d29e39..d0f7bed509e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -1410,7 +1410,7 @@ public class CheckIndex {
   private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField) {
     for (int i = 0; i < reader.maxDoc(); i++) {
       long value = ndv.get(i);
-      if (docsWithField.get(i) == false && value > 0) {
+      if (docsWithField.get(i) == false && value != 0) {
         throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i);
       }
     }
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 8ae12c55368..3398b343d49 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -16,3 +16,4 @@
 org.apache.lucene.codecs.lucene40.Lucene40Codec
 org.apache.lucene.codecs.lucene41.Lucene41Codec
 org.apache.lucene.codecs.lucene42.Lucene42Codec
+org.apache.lucene.codecs.lucene45.Lucene45Codec
\ No newline at end of file
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
index c0e599e3611..262f8a2bdd5 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@@ -14,3 +14,4 @@
 #  limitations under the License.
 
 org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat
+org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat
\ No newline at end of file
diff --git a/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java
index d973fb5c67c..df66d801833 100644
--- a/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java
@@ -17,21 +17,27 @@ package org.apache.lucene;
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.codecs.*;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
-import org.apache.lucene.document.*;
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.*;
-import org.apache.lucene.store.*;
-import org.apache.lucene.util.*;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.BaseDirectoryWrapper;
+import org.apache.lucene.util.LuceneTestCase;
+
 
 /* Intentionally outside of oal.index to verify fully
    external codecs work fine */
 
 public class TestExternalCodecs extends LuceneTestCase {
 
-  private static final class CustomPerFieldCodec extends Lucene42Codec {
+  private static final class CustomPerFieldCodec extends Lucene45Codec {
     
     private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly");
     private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestCheapBastardDocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java
similarity index 79%
rename from lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestCheapBastardDocValuesFormat.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java
index 4cd40c5cd4e..3f6171acd62 100644
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestCheapBastardDocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.diskdv;
+package org.apache.lucene.codecs.lucene45;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -18,15 +18,14 @@ package org.apache.lucene.codecs.diskdv;
  */
 
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.cheapbastard.CheapBastardDocValuesFormat;
 import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
 import org.apache.lucene.util._TestUtil;
 
 /**
- * Tests CheapBastardDocValuesFormat
+ * Tests Lucene45DocValuesFormat
  */
-public class TestCheapBastardDocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
-  private final Codec codec = _TestUtil.alwaysDocValuesFormat(new CheapBastardDocValuesFormat());
+public class TestLucene45DocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
+  private final Codec codec = _TestUtil.alwaysDocValuesFormat(new Lucene45DocValuesFormat());
 
   @Override
   protected Codec getCodec() {
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
index 06795f005c6..43ac931c16f 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -79,9 +79,9 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
     Directory directory = newDirectory();
     // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
-    final DocValuesFormat fast = DocValuesFormat.forName("Lucene42");
+    final DocValuesFormat fast = DocValuesFormat.forName("Lucene45");
     final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
-    iwc.setCodec(new Lucene42Codec() {
+    iwc.setCodec(new Lucene45Codec() {
       @Override
       public DocValuesFormat getDocValuesFormatForField(String field) {
         if ("dv1".equals(field)) {
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
index 8d9f8838fba..4bfc8115ef6 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
 import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
 import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
@@ -200,7 +200,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
 
   }
 
-  public static class MockCodec extends Lucene42Codec {
+  public static class MockCodec extends Lucene45Codec {
     final PostingsFormat lucene40 = new Lucene41PostingsFormat();
     final PostingsFormat simpleText = new SimpleTextPostingsFormat();
     final PostingsFormat mockSep = new MockSepPostingsFormat();
@@ -217,7 +217,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
     }
   }
 
-  public static class MockCodec2 extends Lucene42Codec {
+  public static class MockCodec2 extends Lucene45Codec {
     final PostingsFormat lucene40 = new Lucene41PostingsFormat();
     final PostingsFormat simpleText = new SimpleTextPostingsFormat();
     
@@ -268,7 +268,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
   }
   
   public void testSameCodecDifferentInstance() throws Exception {
-    Codec codec = new Lucene42Codec() {
+    Codec codec = new Lucene45Codec() {
       @Override
       public PostingsFormat getPostingsFormatForField(String field) {
         if ("id".equals(field)) {
@@ -284,7 +284,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
   }
   
   public void testSameCodecDifferentParams() throws Exception {
-    Codec codec = new Lucene42Codec() {
+    Codec codec = new Lucene45Codec() {
       @Override
       public PostingsFormat getPostingsFormatForField(String field) {
         if ("id".equals(field)) {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
index d0789dca5e3..d0ea942b959 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
@@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FilterCodec;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -1060,7 +1060,7 @@ public class TestAddIndexes extends LuceneTestCase {
     aux2.close();
   }
 
-  private static final class CustomPerFieldCodec extends Lucene42Codec {
+  private static final class CustomPerFieldCodec extends Lucene45Codec {
     private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
     private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
     private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep");
@@ -1111,7 +1111,7 @@ public class TestAddIndexes extends LuceneTestCase {
   
   private static final class UnRegisteredCodec extends FilterCodec {
     public UnRegisteredCodec() {
-      super("NotRegistered", new Lucene42Codec());
+      super("NotRegistered", new Lucene45Codec());
     }
   }
   
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
index 9a9f75e47d6..784301783e1 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexFileNames;
@@ -41,7 +41,7 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
   public void test() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    conf.setCodec(new Lucene42Codec());
+    conf.setCodec(new Lucene45Codec());
     // riw should sometimes create docvalues fields, etc
     RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
     Document doc = new Document();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
index 7c4f1a68a65..e222258bf0a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
@@ -49,7 +49,7 @@ public class TestDuelingCodecs extends LuceneTestCase {
   public void setUp() throws Exception {
     super.setUp();
 
-    // for now its SimpleText vs Lucene42(random postings format)
+    // for now its SimpleText vs Lucene45(random postings format)
     // as this gives the best overall coverage. when we have more
     // codecs we should probably pick 2 from Codec.availableCodecs()
     
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java b/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java
index 4f2c51edf00..1724cc26ec0 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java
@@ -25,8 +25,8 @@ import org.apache.lucene.codecs.Codec;
 // enough to test the basics via Codec
 public class TestNamedSPILoader extends LuceneTestCase {
   public void testLookup() {
-    Codec codec = Codec.forName("Lucene42");
-    assertEquals("Lucene42", codec.getName());
+    Codec codec = Codec.forName("Lucene45");
+    assertEquals("Lucene45", codec.getName());
   }
   
   // we want an exception if its not found.
@@ -39,6 +39,6 @@ public class TestNamedSPILoader extends LuceneTestCase {
   
   public void testAvailableServices() {
     Set<String> codecs = Codec.availableCodecs();
-    assertTrue(codecs.contains("Lucene42"));
+    assertTrue(codecs.contains("Lucene45"));
   }
 }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java
index 4987062afc0..f8984770a41 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java
@@ -42,6 +42,7 @@ import org.apache.lucene.facet.params.FacetIndexingParams;
  * 
  * @lucene.experimental
  */
+// nocommit
 public class Facet42Codec extends Lucene42Codec {
 
   private final Set<String> facetFields;
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java
index e08899c5d01..79a78e0acfd 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java
@@ -68,7 +68,9 @@ public class Facet42DocValuesConsumer extends DocValuesConsumer {
 
     long totBytes = 0;
     for (BytesRef v : values) {
-      totBytes += v.length;
+      if (v != null) { 
+        totBytes += v.length;
+      }
     }
 
     if (totBytes > Integer.MAX_VALUE) {
@@ -78,7 +80,9 @@ public class Facet42DocValuesConsumer extends DocValuesConsumer {
     out.writeVInt((int) totBytes);
 
     for (BytesRef v : values) {
-      out.writeBytes(v.bytes, v.offset, v.length);
+      if (v != null) {
+        out.writeBytes(v.bytes, v.offset, v.length);
+      }
     }
 
     PackedInts.Writer w = PackedInts.getWriter(out, maxDoc+1, PackedInts.bitsRequired(totBytes+1), acceptableOverheadRatio);
@@ -86,7 +90,9 @@ public class Facet42DocValuesConsumer extends DocValuesConsumer {
     int address = 0;
     for(BytesRef v : values) {
       w.add(address);
-      address += v.length;
+      if (v != null) {
+        address += v.length;
+      }
     }
     w.add(address);
     w.finish();
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java
index 8da0ad64224..73c5d497548 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java
@@ -68,7 +68,7 @@ public class DoubleFieldSource extends FieldCacheSource {
 
       @Override
       public boolean exists(int doc) {
-        return valid.get(doc);
+        return arr.get(doc) != 0 || valid.get(doc);
       }
 
       @Override
@@ -142,7 +142,7 @@ public class DoubleFieldSource extends FieldCacheSource {
           @Override
           public void fillValue(int doc) {
             mval.value = arr.get(doc);
-            mval.exists = valid.get(doc);
+            mval.exists = mval.value != 0 || valid.get(doc);
           }
         };
       }
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java
index da36cfc1360..c81526365c4 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java
@@ -72,7 +72,7 @@ public class FloatFieldSource extends FieldCacheSource {
 
       @Override
       public boolean exists(int doc) {
-        return valid.get(doc);
+        return arr.get(doc) != 0 || valid.get(doc);
       }
 
       @Override
@@ -88,7 +88,7 @@ public class FloatFieldSource extends FieldCacheSource {
           @Override
           public void fillValue(int doc) {
             mval.value = arr.get(doc);
-            mval.exists = valid.get(doc);
+            mval.exists = mval.value != 0 || valid.get(doc);
           }
         };
       }
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IntFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IntFieldSource.java
index 296432d6f9a..768d9d77e5a 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IntFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IntFieldSource.java
@@ -95,7 +95,7 @@ public class IntFieldSource extends FieldCacheSource {
 
       @Override
       public boolean exists(int doc) {
-        return valid.get(doc);
+        return arr.get(doc) != 0 || valid.get(doc);
       }
 
       @Override
@@ -150,7 +150,7 @@ public class IntFieldSource extends FieldCacheSource {
           @Override
           public void fillValue(int doc) {
             mval.value = arr.get(doc);
-            mval.exists = valid.get(doc);
+            mval.exists = mval.value != 0 || valid.get(doc);
           }
         };
       }
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LongFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LongFieldSource.java
index 597efe89e97..d1718313d8b 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LongFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LongFieldSource.java
@@ -81,7 +81,7 @@ public class LongFieldSource extends FieldCacheSource {
 
       @Override
       public boolean exists(int doc) {
-        return valid.get(doc);
+        return arr.get(doc) != 0 || valid.get(doc);
       }
 
       @Override
@@ -141,7 +141,7 @@ public class LongFieldSource extends FieldCacheSource {
           @Override
           public void fillValue(int doc) {
             mval.value = arr.get(doc);
-            mval.exists = valid.get(doc);
+            mval.exists = mval.value != 0 || valid.get(doc);
           }
         };
       }
diff --git a/lucene/site/xsl/index.xsl b/lucene/site/xsl/index.xsl
index 9a7235b395b..842f7be3d9d 100644
--- a/lucene/site/xsl/index.xsl
+++ b/lucene/site/xsl/index.xsl
@@ -75,7 +75,7 @@
             <li><a href="SYSTEM_REQUIREMENTS.html">System Requirements</a>: Minimum and supported Java versions.</li>
             <li><a href="MIGRATE.html">Migration Guide</a>: What changed in Lucene 4; how to migrate code from Lucene 3.x.</li>
             <li><a href="JRE_VERSION_MIGRATION.html">JRE Version Migration</a>: Information about upgrading between major JRE versions.</li>
-            <li><a href="core/org/apache/lucene/codecs/lucene42/package-summary.html#package_description">File Formats</a>: Guide to the supported index format used by Lucene.  This can be customized by using <a href="core/org/apache/lucene/codecs/package-summary.html#package_description">an alternate codec</a>.</li>
+            <li><a href="core/org/apache/lucene/codecs/lucene45/package-summary.html#package_description">File Formats</a>: Guide to the supported index format used by Lucene.  This can be customized by using <a href="core/org/apache/lucene/codecs/package-summary.html#package_description">an alternate codec</a>.</li>
             <li><a href="core/org/apache/lucene/search/package-summary.html#package_description">Search and Scoring in Lucene</a>: Introduction to how Lucene scores documents.</li>
             <li><a href="core/org/apache/lucene/search/similarities/TFIDFSimilarity.html">Classic Scoring Formula</a>: Formula of Lucene's classic <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space</a> implementation. (look <a href="core/org/apache/lucene/search/similarities/package-summary.html#package_description">here</a> for other models)</li>
             <li><a href="queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package_description">Classic QueryParser Syntax</a>: Overview of the Classic QueryParser's syntax and features.</li>
diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxSimilarityValueSource.java b/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxSimilarityValueSource.java
index f501f150ddb..2a3b2a7c539 100644
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxSimilarityValueSource.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxSimilarityValueSource.java
@@ -78,10 +78,12 @@ public class BBoxSimilarityValueSource extends ValueSource {
 
       @Override
       public float floatVal(int doc) {
+        double minXVal = minX.get(doc);
+        double maxXVal = maxX.get(doc);
         // make sure it has minX and area
-        if (validMinX.get(doc) && validMaxX.get(doc)) {
+        if ((minXVal != 0 || validMinX.get(doc)) && (maxXVal != 0 || validMaxX.get(doc))) {
           rect.reset(
-              minX.get(doc), maxX.get(doc),
+              minXVal, maxXVal,
               minY.get(doc), maxY.get(doc));
           return (float) similarity.score(rect, null);
         } else {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 8c8ad170c2c..60f68ead5ec 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -34,7 +34,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -161,7 +161,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    *  codec to use. */
   protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer) {
     IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
-    iwc.setCodec(new Lucene42Codec());
+    iwc.setCodec(new Lucene45Codec());
     iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
     return iwc;
   }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
index 19cad74151f..9591997229d 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
@@ -23,10 +23,10 @@ import org.apache.lucene.codecs.NormsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 
 /**
- * Acts like {@link Lucene42Codec} but with additional asserts.
+ * Acts like {@link Lucene45Codec} but with additional asserts.
  */
 public final class AssertingCodec extends FilterCodec {
 
@@ -37,7 +37,7 @@ public final class AssertingCodec extends FilterCodec {
   private final NormsFormat norms = new AssertingNormsFormat();
 
   public AssertingCodec() {
-    super("Asserting", new Lucene42Codec());
+    super("Asserting", new Lucene45Codec());
   }
 
   @Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
index a59e4c58c06..298d7aaf011 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
@@ -24,7 +24,7 @@ import java.util.NoSuchElementException;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
 import org.apache.lucene.index.AssertingAtomicReader;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.FieldInfo;
@@ -39,10 +39,10 @@ import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.OpenBitSet;
 
 /**
- * Just like {@link Lucene42DocValuesFormat} but with additional asserts.
+ * Just like {@link Lucene45DocValuesFormat} but with additional asserts.
  */
 public class AssertingDocValuesFormat extends DocValuesFormat {
-  private final DocValuesFormat in = new Lucene42DocValuesFormat();
+  private final DocValuesFormat in = new Lucene45DocValuesFormat();
   
   public AssertingDocValuesFormat() {
     super("Asserting");
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
index 8b64401b452..5579af6245f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
@@ -28,6 +28,7 @@ import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 
+// nocommit
 /**
  * Just like {@link Lucene42NormsFormat} but with additional asserts.
  */
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
index 644f04becea..15310e09bc4 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
@@ -23,10 +23,12 @@ import org.apache.lucene.codecs.NormsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
+import org.apache.lucene.codecs.diskdv.DiskNormsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 
 /** Codec that tries to use as little ram as possible because he spent all his money on beer */
 // TODO: better name :) 
@@ -39,11 +41,11 @@ public class CheapBastardCodec extends FilterCodec {
   private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
   private final TermVectorsFormat termVectors = new Lucene40TermVectorsFormat();
   // these go to disk for all docvalues/norms datastructures
-  private final DocValuesFormat docValues = new CheapBastardDocValuesFormat();
-  private final NormsFormat norms = new CheapBastardNormsFormat();
+  private final DocValuesFormat docValues = new DiskDocValuesFormat();
+  private final NormsFormat norms = new DiskNormsFormat();
 
   public CheapBastardCodec() {
-    super("CheapBastard", new Lucene42Codec());
+    super("CheapBastard", new Lucene45Codec());
   }
   
   public PostingsFormat postingsFormat() {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesFormat.java
deleted file mode 100644
index 07f152cd6ee..00000000000
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesFormat.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package org.apache.lucene.codecs.cheapbastard;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer;
-import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * DocValues format that keeps everything on disk.
- * <p>
- * Internally there are only 2 field types:
- * <ul>
- *   <li>BINARY: a big byte[].
- *   <li>NUMERIC: packed ints
- * </ul>
- * SORTED is encoded as BINARY + NUMERIC
- * <p>
- * NOTE: Don't use this format in production (its not very efficient).
- * Most likely you would want some parts in RAM, other parts on disk. 
- * <p>
- * @lucene.experimental
- */
-public final class CheapBastardDocValuesFormat extends DocValuesFormat {
-
-  public CheapBastardDocValuesFormat() {
-    super("CheapBastard");
-  }
-
-  @Override
-  public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    return new DiskDocValuesConsumer(state, DiskDocValuesFormat.DATA_CODEC, 
-                                            DiskDocValuesFormat.DATA_EXTENSION, 
-                                            DiskDocValuesFormat.META_CODEC, 
-                                            DiskDocValuesFormat.META_EXTENSION) {
-      // don't ever write an index, we dont want to use RAM :)
-      @Override
-      protected void addTermsDict(FieldInfo field, Iterable<BytesRef> values) throws IOException {
-        addBinaryField(field, values);
-      }  
-    };
-  }
-
-  @Override
-  public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
-    return new CheapBastardDocValuesProducer(state, DiskDocValuesFormat.DATA_CODEC, 
-                                                    DiskDocValuesFormat.DATA_EXTENSION, 
-                                                    DiskDocValuesFormat.META_CODEC, 
-                                                    DiskDocValuesFormat.META_EXTENSION);
-  }
-}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
deleted file mode 100644
index f6098dc1f21..00000000000
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
+++ /dev/null
@@ -1,444 +0,0 @@
-package org.apache.lucene.codecs.cheapbastard;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.DELTA_COMPRESSED;
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.GCD_COMPRESSED;
-import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.TABLE_COMPRESSED;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.DocValuesProducer.SortedSetDocsWithField;
-import org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer;
-import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
-import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.packed.BlockPackedReader;
-import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
-import org.apache.lucene.util.packed.PackedInts;
-
-class CheapBastardDocValuesProducer extends DocValuesProducer {
-  private final Map<Integer,NumericEntry> numerics;
-  private final Map<Integer,NumericEntry> ords;
-  private final Map<Integer,NumericEntry> ordIndexes;
-  private final Map<Integer,BinaryEntry> binaries;
-  private final IndexInput data;
-  private final int maxDoc;
-  
-  CheapBastardDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
-    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
-    this.maxDoc = state.segmentInfo.getDocCount();
-    // read in the entries from the metadata file.
-    IndexInput in = state.directory.openInput(metaName, state.context);
-    boolean success = false;
-    final int version;
-    try {
-      version = CodecUtil.checkHeader(in, metaCodec, 
-                                      DiskDocValuesFormat.VERSION_CURRENT,
-                                      DiskDocValuesFormat.VERSION_CURRENT);
-      numerics = new HashMap<Integer,NumericEntry>();
-      ords = new HashMap<Integer,NumericEntry>();
-      ordIndexes = new HashMap<Integer,NumericEntry>();
-      binaries = new HashMap<Integer,BinaryEntry>();
-      readFields(in);
-
-      success = true;
-    } finally {
-      if (success) {
-        IOUtils.close(in);
-      } else {
-        IOUtils.closeWhileHandlingException(in);
-      }
-    }
-
-    success = false;
-    try {
-      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
-      data = state.directory.openInput(dataName, state.context);
-      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
-                                                 DiskDocValuesFormat.VERSION_CURRENT,
-                                                 DiskDocValuesFormat.VERSION_CURRENT);
-      if (version != version2) {
-        throw new CorruptIndexException("Versions mismatch");
-      }
-
-      success = true;
-    } finally {
-      if (!success) {
-        IOUtils.closeWhileHandlingException(this.data);
-      }
-    }
-
-  }
-  
-  private void readFields(IndexInput meta) throws IOException {
-    int fieldNumber = meta.readVInt();
-    while (fieldNumber != -1) {
-      byte type = meta.readByte();
-      if (type == DiskDocValuesFormat.NUMERIC) {
-        numerics.put(fieldNumber, readNumericEntry(meta));
-      } else if (type == DiskDocValuesFormat.BINARY) {
-        BinaryEntry b = readBinaryEntry(meta);
-        binaries.put(fieldNumber, b);
-      } else if (type == DiskDocValuesFormat.SORTED) {
-        // sorted = binary + numeric
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != DiskDocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        BinaryEntry b = readBinaryEntry(meta);
-        binaries.put(fieldNumber, b);
-        
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        NumericEntry n = readNumericEntry(meta);
-        ords.put(fieldNumber, n);
-      } else if (type == DiskDocValuesFormat.SORTED_SET) {
-        // sortedset = binary + numeric + ordIndex
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != DiskDocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        BinaryEntry b = readBinaryEntry(meta);
-        binaries.put(fieldNumber, b);
-        
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        NumericEntry n1 = readNumericEntry(meta);
-        ords.put(fieldNumber, n1);
-        
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        NumericEntry n2 = readNumericEntry(meta);
-        ordIndexes.put(fieldNumber, n2);
-      } else {
-        throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta);
-      }
-      fieldNumber = meta.readVInt();
-    }
-  }
-  
-  static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
-    NumericEntry entry = new NumericEntry();
-    entry.format = meta.readVInt();
-    entry.packedIntsVersion = meta.readVInt();
-    entry.offset = meta.readLong();
-    entry.count = meta.readVLong();
-    entry.blockSize = meta.readVInt();
-    switch(entry.format) {
-      case GCD_COMPRESSED:
-        entry.minValue = meta.readLong();
-        entry.gcd = meta.readLong();
-        break;
-      case TABLE_COMPRESSED:
-        if (entry.count > Integer.MAX_VALUE) {
-          throw new CorruptIndexException("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
-        }
-        final int uniqueValues = meta.readVInt();
-        if (uniqueValues > 256) {
-          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
-        }
-        entry.table = new long[uniqueValues];
-        for (int i = 0; i < uniqueValues; ++i) {
-          entry.table[i] = meta.readLong();
-        }
-        break;
-      case DELTA_COMPRESSED:
-        break;
-      default:
-        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
-    }
-    return entry;
-  }
-  
-  static BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
-    BinaryEntry entry = new BinaryEntry();
-    int format = meta.readVInt();
-    if (format != DiskDocValuesConsumer.BINARY_FIXED_UNCOMPRESSED && format != DiskDocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED) {
-      throw new CorruptIndexException("Unexpected format for binary entry: " + format + ", input=" + meta);
-    }
-    entry.minLength = meta.readVInt();
-    entry.maxLength = meta.readVInt();
-    entry.count = meta.readVLong();
-    entry.offset = meta.readLong();
-    if (entry.minLength != entry.maxLength) {
-      entry.addressesOffset = meta.readLong();
-      entry.packedIntsVersion = meta.readVInt();
-      entry.blockSize = meta.readVInt();
-    }
-    return entry;
-  }
-
-  @Override
-  public NumericDocValues getNumeric(FieldInfo field) throws IOException {
-    NumericEntry entry = numerics.get(field.number);
-    return getNumeric(field, entry);
-  }
-  
-  private LongNumericDocValues getNumeric(FieldInfo field, final NumericEntry entry) throws IOException {
-    final IndexInput data = this.data.clone();
-    data.seek(entry.offset);
-
-    switch (entry.format) {
-      case DELTA_COMPRESSED:
-        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-        return new LongNumericDocValues() {
-          @Override
-          public long get(long id) {
-            return reader.get(id);
-          }
-        };
-      case GCD_COMPRESSED:
-        final long min = entry.minValue;
-        final long mult = entry.gcd;
-        final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-        return new LongNumericDocValues() {
-          @Override
-          public long get(long id) {
-            return min + mult * quotientReader.get(id);
-          }
-        };
-      case TABLE_COMPRESSED:
-        final long[] table = entry.table;
-        final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
-        final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
-        return new LongNumericDocValues() {
-          @Override
-          long get(long id) {
-            return table[(int) ords.get((int) id)];
-          }
-        };
-      default:
-        throw new AssertionError();
-    }
-  }
-
-  @Override
-  public BinaryDocValues getBinary(FieldInfo field) throws IOException {
-    BinaryEntry bytes = binaries.get(field.number);
-    if (bytes.minLength == bytes.maxLength) {
-      return getFixedBinary(field, bytes);
-    } else {
-      return getVariableBinary(field, bytes);
-    }
-  }
-  
-  private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
-    final IndexInput data = this.data.clone();
-
-    return new LongBinaryDocValues() {
-      @Override
-      public void get(long id, BytesRef result) {
-        long address = bytes.offset + id * bytes.maxLength;
-        try {
-          data.seek(address);
-          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
-          // assume "they" own the bytes after calling this!
-          final byte[] buffer = new byte[bytes.maxLength];
-          data.readBytes(buffer, 0, buffer.length);
-          result.bytes = buffer;
-          result.offset = 0;
-          result.length = buffer.length;
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    };
-  }
-  
-  private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
-    final IndexInput data = this.data.clone();
-    data.seek(bytes.addressesOffset);
-
-    final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, true);
-    return new LongBinaryDocValues() {
-      @Override
-      public void get(long id, BytesRef result) {
-        long startAddress = bytes.offset + (id == 0 ? 0 : + addresses.get(id-1));
-        long endAddress = bytes.offset + addresses.get(id);
-        int length = (int) (endAddress - startAddress);
-        try {
-          data.seek(startAddress);
-          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
-          // assume "they" own the bytes after calling this!
-          final byte[] buffer = new byte[length];
-          data.readBytes(buffer, 0, buffer.length);
-          result.bytes = buffer;
-          result.offset = 0;
-          result.length = length;
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    };
-  }
-
-  @Override
-  public SortedDocValues getSorted(FieldInfo field) throws IOException {
-    final int valueCount = (int) binaries.get(field.number).count;
-    final BinaryDocValues binary = getBinary(field);
-    final NumericDocValues ordinals = getNumeric(field, ords.get(field.number));
-    return new SortedDocValues() {
-
-      @Override
-      public int getOrd(int docID) {
-        return (int) ordinals.get(docID);
-      }
-
-      @Override
-      public void lookupOrd(int ord, BytesRef result) {
-        binary.get(ord, result);
-      }
-
-      @Override
-      public int getValueCount() {
-        return valueCount;
-      }
-    };
-  }
-
-  @Override
-  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
-    final long valueCount = binaries.get(field.number).count;
-    final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
-    final LongNumericDocValues ordinals = getNumeric(field, ords.get(field.number));
-    NumericEntry entry = ordIndexes.get(field.number);
-    IndexInput data = this.data.clone();
-    data.seek(entry.offset);
-    final MonotonicBlockPackedReader ordIndex = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-    
-    return new SortedSetDocValues() {
-      long offset;
-      long endOffset;
-      
-      @Override
-      public long nextOrd() {
-        if (offset == endOffset) {
-          return NO_MORE_ORDS;
-        } else {
-          long ord = ordinals.get(offset);
-          offset++;
-          return ord;
-        }
-      }
-
-      @Override
-      public void setDocument(int docID) {
-        offset = (docID == 0 ? 0 : ordIndex.get(docID-1));
-        endOffset = ordIndex.get(docID);
-      }
-
-      @Override
-      public void lookupOrd(long ord, BytesRef result) {
-        binary.get(ord, result);
-      }
-
-      @Override
-      public long getValueCount() {
-        return valueCount;
-      }
-    };
-  }
-  
-  @Override
-  public Bits getDocsWithField(FieldInfo field) throws IOException {
-    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
-      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
-    } else {
-      return new Bits.MatchAllBits(maxDoc);
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    data.close();
-  }
-  
-  static class NumericEntry {
-    long offset;
-
-    int format;
-    int packedIntsVersion;
-    long count;
-    int blockSize;
-    
-    long minValue;
-    long gcd;
-    long table[];
-  }
-  
-  static class BinaryEntry {
-    long offset;
-
-    long count;
-    int minLength;
-    int maxLength;
-    long addressesOffset;
-    int packedIntsVersion;
-    int blockSize;
-  }
-  
-  // internally we compose complex dv (sorted/sortedset) from other ones
-  static abstract class LongNumericDocValues extends NumericDocValues {
-    @Override
-    public final long get(int docID) {
-      return get((long) docID);
-    }
-    
-    abstract long get(long id);
-  }
-  
-  static abstract class LongBinaryDocValues extends BinaryDocValues {
-    @Override
-    public final void get(int docID, BytesRef result) {
-      get((long)docID, result);
-    }
-    
-    abstract void get(long id, BytesRef Result);
-  }
-}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
index 5fa95c4c9e0..bcfd9361e37 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
@@ -23,13 +23,13 @@ import org.apache.lucene.codecs.FilterCodec;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
 
 /**
  * A codec that uses {@link CompressingStoredFieldsFormat} for its stored
- * fields and delegates to {@link Lucene42Codec} for everything else.
+ * fields and delegates to {@link Lucene45Codec} for everything else.
  */
 public abstract class CompressingCodec extends FilterCodec {
 
@@ -73,7 +73,7 @@ public abstract class CompressingCodec extends FilterCodec {
    * Creates a compressing codec with a given segment suffix
    */
   public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
-    super(name, new Lucene42Codec());
+    super(name, new Lucene45Codec());
     this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
     this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize);
   }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
index 24f41ab2995..8b2ed06e9e3 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
@@ -2,8 +2,8 @@ package org.apache.lucene.codecs.compressing;
 
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat; // nocommit
+import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat; // nocommit
 import org.apache.lucene.util.packed.PackedInts;
 
 /*
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
index 7c6ba48f91d..5b6385e0d36 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
@@ -6,6 +6,7 @@ import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
 import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.util.packed.PackedInts;
 
+// nocommit
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java
index 2f1fc293592..6b4b1091b97 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java
@@ -21,6 +21,7 @@ import org.apache.lucene.util.packed.PackedInts;
  * limitations under the License.
  */
 
+// nocommit
 /** CompressionCodec that uses {@link CompressionMode#HIGH_COMPRESSION} */
 public class HighCompressionCompressingCodec extends CompressingCodec {
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
index 430948704a2..c25a0f3c764 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
@@ -32,7 +32,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.compressing.CompressingCodec;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoubleField;
@@ -502,7 +502,7 @@ public abstract class BaseStoredFieldsFormatTestCase extends LuceneTestCase {
     // get another codec, other than the default: so we are merging segments across different codecs
     final Codec otherCodec;
     if ("SimpleText".equals(Codec.getDefault().getName())) {
-      otherCodec = new Lucene42Codec();
+      otherCodec = new Lucene45Codec();
     } else {
       otherCodec = new SimpleTextCodec();
     }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index 9ecc5122f33..239be0f34c7 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -35,10 +35,9 @@ import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
 import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
 import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval;
 import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
 import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
-import org.apache.lucene.codecs.cheapbastard.CheapBastardDocValuesFormat;
 import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
 import org.apache.lucene.codecs.memory.DirectPostingsFormat;
 import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
@@ -62,7 +61,7 @@ import org.apache.lucene.util._TestUtil;
  * documents in different orders and the test will still be deterministic
  * and reproducable.
  */
-public class RandomCodec extends Lucene42Codec {
+public class RandomCodec extends Lucene45Codec {
   /** Shuffled list of postings formats to use for new mappings */
   private List<PostingsFormat> formats = new ArrayList<PostingsFormat>();
   
@@ -148,11 +147,10 @@ public class RandomCodec extends Lucene42Codec {
         new MemoryPostingsFormat(false, random.nextFloat()));
     
     addDocValues(avoidCodecs,
-        new Lucene42DocValuesFormat(),
+        new Lucene45DocValuesFormat(),
         new DiskDocValuesFormat(),
         new SimpleTextDocValuesFormat(),
-        new AssertingDocValuesFormat(),
-        new CheapBastardDocValuesFormat());
+        new AssertingDocValuesFormat());
 
     Collections.shuffle(formats, random);
     Collections.shuffle(dvFormats, random);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
index ebe0426a18c..1ab8c818b7f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@@ -40,6 +40,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40RWCodec;
 import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41RWCodec;
 import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.index.RandomCodec;
 import org.apache.lucene.search.RandomSimilarityProvider;
@@ -146,6 +147,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
     
     savedCodec = Codec.getDefault();
     int randomVal = random.nextInt(10);
+    // nocommit: 4.2 impersonator
     if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) &&
                                           "random".equals(TEST_POSTINGSFORMAT) &&
                                           "random".equals(TEST_DOCVALUESFORMAT) &&
@@ -182,7 +184,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
         dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT);
       }
       
-      codec = new Lucene42Codec() {       
+      codec = new Lucene45Codec() {       
         @Override
         public PostingsFormat getPostingsFormatForField(String field) {
           return format;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
index baf09e121a3..d79c948ed7e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@@ -44,7 +44,7 @@ import java.util.zip.ZipFile;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 import org.apache.lucene.document.BinaryDocValuesField;
@@ -703,7 +703,7 @@ public class _TestUtil {
     if (LuceneTestCase.VERBOSE) {
       System.out.println("forcing postings format to:" + format);
     }
-    return new Lucene42Codec() {
+    return new Lucene45Codec() {
       @Override
       public PostingsFormat getPostingsFormatForField(String field) {
         return format;
@@ -721,7 +721,7 @@ public class _TestUtil {
     if (LuceneTestCase.VERBOSE) {
       System.out.println("forcing docvalues format to:" + format);
     }
-    return new Lucene42Codec() {
+    return new Lucene45Codec() {
       @Override
       public DocValuesFormat getDocValuesFormatForField(String field) {
         return format;
@@ -757,9 +757,12 @@ public class _TestUtil {
     }
   }
 
+  // nocommit: remove this, push this test to Lucene40/Lucene42 codec tests
   public static boolean fieldSupportsHugeBinaryDocValues(String field) {
     String dvFormat = getDocValuesFormat(field);
-    return dvFormat.equals("CheapBastard") ||
+    System.out.println(dvFormat);
+    return dvFormat.equals("Lucene45") ||
+      dvFormat.equals("Asserting") || 
       dvFormat.equals("Disk") ||
       dvFormat.equals("SimpleText");
   }
diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
index 2eeda2459c7..d1798334486 100644
--- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@@ -14,4 +14,3 @@
 #  limitations under the License.
 
 org.apache.lucene.codecs.asserting.AssertingDocValuesFormat
-org.apache.lucene.codecs.cheapbastard.CheapBastardDocValuesFormat
\ No newline at end of file
diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
index 52d48280945..49bca112fc2 100644
--- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
@@ -3,7 +3,7 @@ package org.apache.solr.core;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.util.plugin.SolrCoreAware;
@@ -51,7 +51,7 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
   @Override
   public void init(NamedList args) {
     super.init(args);
-    codec = new Lucene42Codec() {
+    codec = new Lucene45Codec() {
       @Override
       public PostingsFormat getPostingsFormatForField(String field) {
         final SchemaField fieldOrNull = core.getLatestSchema().getFieldOrNull(field);
diff --git a/solr/core/src/java/org/apache/solr/request/NumericFacets.java b/solr/core/src/java/org/apache/solr/request/NumericFacets.java
index 96796c996be..e16e6358ca4 100644
--- a/solr/core/src/java/org/apache/solr/request/NumericFacets.java
+++ b/solr/core/src/java/org/apache/solr/request/NumericFacets.java
@@ -190,8 +190,9 @@ final class NumericFacets {
         }
         docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName);
       }
-      if (docsWithField.get(doc - ctx.docBase)) {
-        hashTable.add(doc, longs.get(doc - ctx.docBase), 1);
+      long v = longs.get(doc - ctx.docBase);
+      if (v != 0 || docsWithField.get(doc - ctx.docBase)) {
+        hashTable.add(doc, v, 1);
       } else {
         ++missingCount;
       }
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
index 15074809892..73593829cf9 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
@@ -22,7 +22,7 @@
   <fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene41"/>
 
   <fieldType name="string_disk" class="solr.StrField" docValuesFormat="Disk" />
-  <fieldType name="string_memory" class="solr.StrField" docValuesFormat="Lucene42" />
+  <fieldType name="string_memory" class="solr.StrField" docValuesFormat="Lucene45" />
 
   <fieldType name="string" class="solr.StrField" />
 
diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
index 0f06d44e2fa..c970652921b 100644
--- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
@@ -55,10 +55,10 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
     PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat();
     assertEquals("Disk", format.getDocValuesFormatForField(schemaField.getName()).getName());
     schemaField = fields.get("string_memory_f");
-    assertEquals("Lucene42",
+    assertEquals("Lucene45",
         format.getDocValuesFormatForField(schemaField.getName()).getName());
     schemaField = fields.get("string_f");
-    assertEquals("Lucene42",
+    assertEquals("Lucene45",
         format.getDocValuesFormatForField(schemaField.getName()).getName());
   }
 
@@ -80,8 +80,8 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
 
     assertEquals("Disk", format.getDocValuesFormatForField("foo_disk").getName());
     assertEquals("Disk", format.getDocValuesFormatForField("bar_disk").getName());
-    assertEquals("Lucene42", format.getDocValuesFormatForField("foo_memory").getName());
-    assertEquals("Lucene42", format.getDocValuesFormatForField("bar_memory").getName());
+    assertEquals("Lucene45", format.getDocValuesFormatForField("foo_memory").getName());
+    assertEquals("Lucene45", format.getDocValuesFormatForField("bar_memory").getName());
   }
 
   public void testUnknownField() {

From 642ac287a24236efc04f109b305df569ebc9cdfc Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 13:49:03 +0000
Subject: [PATCH 04/16] make 4.2 read only / setup impersonator

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515416 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/codecs/lucene42/Lucene42Codec.java |  15 +-
 .../lucene42/Lucene42DocValuesFormat.java     |  15 +-
 .../lucene42/Lucene42DocValuesProducer.java   |  37 ++--
 .../lucene42/Lucene42NormsConsumer.java       | 209 ++++++++++++++++++
 .../codecs/lucene42/Lucene42NormsFormat.java  |   4 +-
 .../lucene42/TestLucene42DocValuesFormat.java |   2 +-
 .../index/TestBackwardsCompatibility.java     |   2 +-
 .../Facet45Codec.java}                        |  18 +-
 .../lucene/facet/codecs/facet45/package.html  |  22 ++
 .../services/org.apache.lucene.codecs.Codec   |   2 +-
 .../apache/lucene/facet/FacetTestCase.java    |   4 +-
 .../lucene/facet/search/TestDemoFacets.java   |   4 +-
 .../compressing/FastCompressingCodec.java     |   9 +-
 .../FastDecompressionCompressingCodec.java    |   8 -
 .../HighCompressionCompressingCodec.java      |   1 -
 .../lucene42/Lucene42DocValuesConsumer.java   |  25 +--
 .../codecs/lucene42/Lucene42RWCodec.java      |  39 ++++
 .../lucene42/Lucene42RWDocValuesFormat.java   |  35 +++
 .../lucene/codecs/lucene42/package.html       |  25 +++
 .../index/BaseDocValuesFormatTestCase.java    |   1 +
 .../util/TestRuleSetupAndRestoreClassEnv.java |   9 +-
 .../services/org.apache.lucene.codecs.Codec   |   1 +
 .../org.apache.lucene.codecs.DocValuesFormat  |   1 +
 23 files changed, 414 insertions(+), 74 deletions(-)
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java
 rename lucene/facet/src/java/org/apache/lucene/facet/codecs/{facet42/Facet42Codec.java => facet45/Facet45Codec.java} (85%)
 create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/package.html
 rename lucene/{core => test-framework}/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (94%)
 create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java
 create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java
 create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/package.html

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java
index 4ec3bd833a4..0ce97ba76e6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java
@@ -17,7 +17,10 @@ package org.apache.lucene.codecs.lucene42;
  * limitations under the License.
  */
 
+import java.io.IOException;
+
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.FilterCodec;
@@ -32,6 +35,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.apache.lucene.index.SegmentWriteState;
 
 /**
  * Implements the Lucene 4.2 index format, with configurable per-field postings
@@ -42,10 +46,12 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
  *
  * @see org.apache.lucene.codecs.lucene42 package documentation for file format details.
  * @lucene.experimental
+ * @deprecated Only for reading old 4.2 segments
  */
 // NOTE: if we make largish changes in a minor release, easier to just make Lucene43Codec or whatever
 // if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
 // (it writes a minor version, etc).
+@Deprecated
 public class Lucene42Codec extends Codec {
   private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
   private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat();
@@ -129,10 +135,15 @@ public class Lucene42Codec extends Codec {
   private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
   private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene42");
 
-  private final NormsFormat normsFormat = new Lucene42NormsFormat();
+  private final NormsFormat normsFormat = new Lucene42NormsFormat() {
+    @Override
+    public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
+      throw new UnsupportedOperationException("this codec can only be used for reading");
+    }
+  };
 
   @Override
-  public final NormsFormat normsFormat() {
+  public NormsFormat normsFormat() {
     return normsFormat;
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
index 55bf8097561..00f18606a49 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
@@ -123,8 +123,10 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
  * <ul>
  *   <li> Binary doc values can be at most {@link #MAX_BINARY_FIELD_LENGTH} in length.
  * </ul>
+ * @deprecated Only for reading old 4.2 segments
  */
-public final class Lucene42DocValuesFormat extends DocValuesFormat {
+@Deprecated
+public class Lucene42DocValuesFormat extends DocValuesFormat {
 
   /** Maximum length for each binary doc values field. */
   public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
@@ -154,8 +156,7 @@ public final class Lucene42DocValuesFormat extends DocValuesFormat {
 
   @Override
   public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    // note: we choose DEFAULT here (its reasonably fast, and for small bpv has tiny waste)
-    return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
+    throw new UnsupportedOperationException("this codec can only be used for reading");
   }
   
   @Override
@@ -163,8 +164,8 @@ public final class Lucene42DocValuesFormat extends DocValuesFormat {
     return new Lucene42DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
   }
   
-  private static final String DATA_CODEC = "Lucene42DocValuesData";
-  private static final String DATA_EXTENSION = "dvd";
-  private static final String METADATA_CODEC = "Lucene42DocValuesMetadata";
-  private static final String METADATA_EXTENSION = "dvm";
+  static final String DATA_CODEC = "Lucene42DocValuesData";
+  static final String DATA_EXTENSION = "dvd";
+  static final String METADATA_CODEC = "Lucene42DocValuesMetadata";
+  static final String METADATA_EXTENSION = "dvm";
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
index c2a95fb8195..7b111c53e5f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
@@ -17,11 +17,6 @@ package org.apache.lucene.codecs.lucene42;
  * limitations under the License.
  */
 
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.DELTA_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.GCD_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.TABLE_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.UNCOMPRESSED;
-
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -78,6 +73,22 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
       new HashMap<Integer,FST<Long>>();
   
   private final int maxDoc;
+  
+  
+  static final byte NUMBER = 0;
+  static final byte BYTES = 1;
+  static final byte FST = 2;
+
+  static final int BLOCK_SIZE = 4096;
+  
+  static final byte DELTA_COMPRESSED = 0;
+  static final byte TABLE_COMPRESSED = 1;
+  static final byte UNCOMPRESSED = 2;
+  static final byte GCD_COMPRESSED = 3;
+  
+  static final int VERSION_START = 0;
+  static final int VERSION_GCD_COMPRESSION = 1;
+  static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
     
   Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     maxDoc = state.segmentInfo.getDocCount();
@@ -88,8 +99,8 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
     final int version;
     try {
       version = CodecUtil.checkHeader(in, metaCodec, 
-                                      Lucene42DocValuesConsumer.VERSION_START,
-                                      Lucene42DocValuesConsumer.VERSION_CURRENT);
+                                      VERSION_START,
+                                      VERSION_CURRENT);
       numerics = new HashMap<Integer,NumericEntry>();
       binaries = new HashMap<Integer,BinaryEntry>();
       fsts = new HashMap<Integer,FSTEntry>();
@@ -109,8 +120,8 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
       String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
       data = state.directory.openInput(dataName, state.context);
       final int version2 = CodecUtil.checkHeader(data, dataCodec, 
-                                                 Lucene42DocValuesConsumer.VERSION_START,
-                                                 Lucene42DocValuesConsumer.VERSION_CURRENT);
+                                                 VERSION_START,
+                                                 VERSION_CURRENT);
       if (version != version2) {
         throw new CorruptIndexException("Format versions mismatch");
       }
@@ -127,7 +138,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
     int fieldNumber = meta.readVInt();
     while (fieldNumber != -1) {
       int fieldType = meta.readByte();
-      if (fieldType == Lucene42DocValuesConsumer.NUMBER) {
+      if (fieldType == NUMBER) {
         NumericEntry entry = new NumericEntry();
         entry.offset = meta.readLong();
         entry.format = meta.readByte();
@@ -140,11 +151,11 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
           default:
                throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
         }
-        if (entry.format != Lucene42DocValuesConsumer.UNCOMPRESSED) {
+        if (entry.format != UNCOMPRESSED) {
           entry.packedIntsVersion = meta.readVInt();
         }
         numerics.put(fieldNumber, entry);
-      } else if (fieldType == Lucene42DocValuesConsumer.BYTES) {
+      } else if (fieldType == BYTES) {
         BinaryEntry entry = new BinaryEntry();
         entry.offset = meta.readLong();
         entry.numBytes = meta.readLong();
@@ -155,7 +166,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
           entry.blockSize = meta.readVInt();
         }
         binaries.put(fieldNumber, entry);
-      } else if (fieldType == Lucene42DocValuesConsumer.FST) {
+      } else if (fieldType == FST) {
         FSTEntry entry = new FSTEntry();
         entry.offset = meta.readLong();
         entry.numOrds = meta.readVLong();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java
new file mode 100644
index 00000000000..797dd807992
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java
@@ -0,0 +1,209 @@
+package org.apache.lucene.codecs.lucene42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.MathUtil;
+import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * Writer for {@link Lucene42NormsFormat}
+ */
+class Lucene42NormsConsumer extends DocValuesConsumer {
+  static final int VERSION_START = 0;
+  static final int VERSION_GCD_COMPRESSION = 1;
+  static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
+  
+  static final byte NUMBER = 0;
+
+  static final int BLOCK_SIZE = 4096;
+  
+  static final byte DELTA_COMPRESSED = 0;
+  static final byte TABLE_COMPRESSED = 1;
+  static final byte UNCOMPRESSED = 2;
+  static final byte GCD_COMPRESSED = 3;
+
+  final IndexOutput data, meta;
+  final int maxDoc;
+  final float acceptableOverheadRatio;
+  
+  Lucene42NormsConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, float acceptableOverheadRatio) throws IOException {
+    this.acceptableOverheadRatio = acceptableOverheadRatio;
+    maxDoc = state.segmentInfo.getDocCount();
+    boolean success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.createOutput(dataName, state.context);
+      CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
+      String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+      meta = state.directory.createOutput(metaName, state.context);
+      CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+
+  @Override
+  public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+    meta.writeVInt(field.number);
+    meta.writeByte(NUMBER);
+    meta.writeLong(data.getFilePointer());
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    long gcd = 0;
+    // TODO: more efficient?
+    HashSet<Long> uniqueValues = null;
+    if (true) {
+      uniqueValues = new HashSet<>();
+
+      long count = 0;
+      for (Number nv : values) {
+        assert nv != null;
+        final long v = nv.longValue();
+
+        if (gcd != 1) {
+          if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
+            // in that case v - minValue might overflow and make the GCD computation return
+            // wrong results. Since these extreme values are unlikely, we just discard
+            // GCD computation for them
+            gcd = 1;
+          } else if (count != 0) { // minValue needs to be set first
+            gcd = MathUtil.gcd(gcd, v - minValue);
+          }
+        }
+
+        minValue = Math.min(minValue, v);
+        maxValue = Math.max(maxValue, v);
+
+        if (uniqueValues != null) {
+          if (uniqueValues.add(v)) {
+            if (uniqueValues.size() > 256) {
+              uniqueValues = null;
+            }
+          }
+        }
+
+        ++count;
+      }
+      assert count == maxDoc;
+    }
+
+    if (uniqueValues != null) {
+      // small number of unique values
+      final int bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
+      FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
+      if (formatAndBits.bitsPerValue == 8 && minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
+        meta.writeByte(UNCOMPRESSED); // uncompressed
+        for (Number nv : values) {
+          data.writeByte(nv == null ? 0 : (byte) nv.longValue());
+        }
+      } else {
+        meta.writeByte(TABLE_COMPRESSED); // table-compressed
+        Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
+        final HashMap<Long,Integer> encode = new HashMap<Long,Integer>();
+        data.writeVInt(decode.length);
+        for (int i = 0; i < decode.length; i++) {
+          data.writeLong(decode[i]);
+          encode.put(decode[i], i);
+        }
+
+        meta.writeVInt(PackedInts.VERSION_CURRENT);
+        data.writeVInt(formatAndBits.format.getId());
+        data.writeVInt(formatAndBits.bitsPerValue);
+
+        final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
+        for(Number nv : values) {
+          writer.add(encode.get(nv == null ? 0 : nv.longValue()));
+        }
+        writer.finish();
+      }
+    } else if (gcd != 0 && gcd != 1) {
+      meta.writeByte(GCD_COMPRESSED);
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      data.writeLong(minValue);
+      data.writeLong(gcd);
+      data.writeVInt(BLOCK_SIZE);
+
+      final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+      for (Number nv : values) {
+        long value = nv == null ? 0 : nv.longValue();
+        writer.add((value - minValue) / gcd);
+      }
+      writer.finish();
+    } else {
+      meta.writeByte(DELTA_COMPRESSED); // delta-compressed
+
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      data.writeVInt(BLOCK_SIZE);
+
+      final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+      for (Number nv : values) {
+        writer.add(nv == null ? 0 : nv.longValue());
+      }
+      writer.finish();
+    }
+  }
+  
+  @Override
+  public void close() throws IOException {
+    boolean success = false;
+    try {
+      if (meta != null) {
+        meta.writeVInt(-1); // write EOF marker
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(data, meta);
+      } else {
+        IOUtils.closeWhileHandlingException(data, meta);
+      }
+    }
+  }
+
+  @Override
+  public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+  
+  @Override
+  public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java
index a7c8c1a2aa7..a4571ec6237 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java
@@ -41,7 +41,7 @@ import org.apache.lucene.util.packed.PackedInts;
  * </ul>
  * @see Lucene42DocValuesFormat
  */
-public final class Lucene42NormsFormat extends NormsFormat {
+public class Lucene42NormsFormat extends NormsFormat {
   final float acceptableOverheadRatio;
 
   /** 
@@ -67,7 +67,7 @@ public final class Lucene42NormsFormat extends NormsFormat {
   
   @Override
   public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
-    return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
+    return new Lucene42NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
   }
   
   @Override
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java
index d86002eb0af..e8885927644 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java
@@ -24,7 +24,7 @@ import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
  * Tests Lucene42DocValuesFormat
  */
 public class TestLucene42DocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
-  private final Codec codec = new Lucene42Codec();
+  private final Codec codec = new Lucene42RWCodec();
 
   @Override
   protected Codec getCodec() {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
index ca851727ecf..8d9529a35bc 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@@ -74,7 +74,7 @@ import org.junit.Ignore;
 // we won't even be running the actual code, only the impostor
 // @SuppressCodecs("Lucene4x")
 // Sep codec cannot yet handle the offsets in our 4.x index!
-@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41"})
+@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41", "Lucene42"})
 public class TestBackwardsCompatibility extends LuceneTestCase {
 
   // Uncomment these cases & run them on an older Lucene version,
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/Facet45Codec.java
similarity index 85%
rename from lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java
rename to lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/Facet45Codec.java
index f8984770a41..9ff508429dc 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/Facet45Codec.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.facet.codecs.facet42;
+package org.apache.lucene.facet.codecs.facet45;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,12 +21,13 @@ import java.util.HashSet;
 import java.util.Set;
 
 import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
+import org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat;
 import org.apache.lucene.facet.params.CategoryListParams;
 import org.apache.lucene.facet.params.FacetIndexingParams;
 
 /**
- * Same as {@link Lucene42Codec} except it uses {@link Facet42DocValuesFormat}
+ * Same as {@link Lucene45Codec} except it uses {@link Facet42DocValuesFormat}
  * for facet fields (faster-but-more-RAM-consuming doc values).
  * 
  * <p>
@@ -42,16 +43,14 @@ import org.apache.lucene.facet.params.FacetIndexingParams;
  * 
  * @lucene.experimental
  */
-// nocommit
-public class Facet42Codec extends Lucene42Codec {
+public class Facet45Codec extends Lucene45Codec {
 
   private final Set<String> facetFields;
   private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName("Facet42");
-  private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42");
 
   // must have that for SPI purposes
   /** Default constructor, uses {@link FacetIndexingParams#DEFAULT}. */
-  public Facet42Codec() {
+  public Facet45Codec() {
     this(FacetIndexingParams.DEFAULT);
   }
 
@@ -60,7 +59,7 @@ public class Facet42Codec extends Lucene42Codec {
    * {@link DocValuesFormat} for the fields that are returned by
    * {@link FacetIndexingParams#getAllCategoryListParams()}.
    */
-  public Facet42Codec(FacetIndexingParams fip) {
+  public Facet45Codec(FacetIndexingParams fip) {
     if (fip.getPartitionSize() != Integer.MAX_VALUE) {
       throw new IllegalArgumentException("this Codec does not support partitions");
     }
@@ -75,8 +74,7 @@ public class Facet42Codec extends Lucene42Codec {
     if (facetFields.contains(field)) {
       return facetsDVFormat;
     } else {
-      return lucene42DVFormat;
+      return super.getDocValuesFormatForField(field);
     }
   }
-  
 }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/package.html b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/package.html
new file mode 100644
index 00000000000..c752b963484
--- /dev/null
+++ b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/package.html
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<body>
+Codec + DocValuesFormat that are optimized for facets.
+</body>
+</html>
diff --git a/lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 343b4cdb07e..d6e8c740686 100644
--- a/lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -13,4 +13,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-org.apache.lucene.facet.codecs.facet42.Facet42Codec
+org.apache.lucene.facet.codecs.facet45.Facet45Codec
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
index 31e79edbf14..6bf9fe6e009 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
@@ -3,7 +3,7 @@ package org.apache.lucene.facet;
 import java.util.Random;
 
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.facet.codecs.facet42.Facet42Codec;
+import org.apache.lucene.facet.codecs.facet45.Facet45Codec;
 import org.apache.lucene.facet.encoding.DGapIntEncoder;
 import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
 import org.apache.lucene.facet.encoding.EightFlagsIntEncoder;
@@ -53,7 +53,7 @@ public abstract class FacetTestCase extends LuceneTestCase {
   public static void beforeClassFacetTestCase() throws Exception {
     if (random().nextDouble() < 0.3) {
       savedDefault = Codec.getDefault(); // save to restore later
-      Codec.setDefault(new Facet42Codec());
+      Codec.setDefault(new Facet45Codec());
     }
   }
   
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java
index 0cf73fcdb23..d0e65772160 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java
@@ -31,7 +31,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.facet.FacetTestCase;
 import org.apache.lucene.facet.FacetTestUtils;
-import org.apache.lucene.facet.codecs.facet42.Facet42Codec;
+import org.apache.lucene.facet.codecs.facet45.Facet45Codec;
 import org.apache.lucene.facet.index.FacetFields;
 import org.apache.lucene.facet.params.CategoryListParams;
 import org.apache.lucene.facet.params.FacetIndexingParams;
@@ -260,7 +260,7 @@ public class TestDemoFacets extends FacetTestCase {
     Directory dir = newDirectory();
     Directory taxoDir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    iwc.setCodec(new Facet42Codec());
+    iwc.setCodec(new Facet45Codec());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
index 8b2ed06e9e3..f973648aaeb 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
@@ -1,9 +1,7 @@
 package org.apache.lucene.codecs.compressing;
 
-import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat; // nocommit
-import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat; // nocommit
+import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.util.packed.PackedInts;
 
 /*
@@ -42,9 +40,4 @@ public class FastCompressingCodec extends CompressingCodec {
   public NormsFormat normsFormat() {
     return new Lucene42NormsFormat(PackedInts.FAST);
   }
-
-  @Override
-  public DocValuesFormat docValuesFormat() {
-    return new Lucene42DocValuesFormat(PackedInts.FAST);
-  }
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
index 5b6385e0d36..d7c0451bcba 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
@@ -1,12 +1,9 @@
 package org.apache.lucene.codecs.compressing;
 
-import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
 import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.util.packed.PackedInts;
 
-// nocommit
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -43,9 +40,4 @@ public class FastDecompressionCompressingCodec extends CompressingCodec {
   public NormsFormat normsFormat() {
     return new Lucene42NormsFormat(PackedInts.DEFAULT);
   }
-
-  @Override
-  public DocValuesFormat docValuesFormat() {
-    return new Lucene42DocValuesFormat(PackedInts.DEFAULT);
-  }
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java
index 6b4b1091b97..2f1fc293592 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java
@@ -21,7 +21,6 @@ import org.apache.lucene.util.packed.PackedInts;
  * limitations under the License.
  */
 
-// nocommit
 /** CompressionCodec that uses {@link CompressionMode#HIGH_COMPRESSION} */
 public class HighCompressionCompressingCodec extends CompressingCodec {
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
similarity index 94%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
index 0a2f92f22a8..ac8aeafd33c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
@@ -46,25 +46,20 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
 import org.apache.lucene.util.packed.PackedInts;
 
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BLOCK_SIZE;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BYTES;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.NUMBER;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.FST;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.TABLE_COMPRESSED;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.UNCOMPRESSED;
+
 /**
  * Writer for {@link Lucene42DocValuesFormat}
  */
 class Lucene42DocValuesConsumer extends DocValuesConsumer {
-  static final int VERSION_START = 0;
-  static final int VERSION_GCD_COMPRESSION = 1;
-  static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
-  
-  static final byte NUMBER = 0;
-  static final byte BYTES = 1;
-  static final byte FST = 2;
-
-  static final int BLOCK_SIZE = 4096;
-  
-  static final byte DELTA_COMPRESSED = 0;
-  static final byte TABLE_COMPRESSED = 1;
-  static final byte UNCOMPRESSED = 2;
-  static final byte GCD_COMPRESSED = 3;
-
   final IndexOutput data, meta;
   final int maxDoc;
   final float acceptableOverheadRatio;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java
new file mode 100644
index 00000000000..5ef50ea522f
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java
@@ -0,0 +1,39 @@
+package org.apache.lucene.codecs.lucene42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.NormsFormat;
+
+/**
+ * Read-write version of {@link Lucene42Codec} for testing.
+ */
+public class Lucene42RWCodec extends Lucene42Codec {
+  private static final DocValuesFormat dv = new Lucene42RWDocValuesFormat();
+  private static final NormsFormat norms = new Lucene42NormsFormat();
+
+  @Override
+  public DocValuesFormat getDocValuesFormatForField(String field) {
+    return dv;
+  }
+
+  @Override
+  public NormsFormat normsFormat() {
+    return norms;
+  }
+}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java
new file mode 100644
index 00000000000..45372246184
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java
@@ -0,0 +1,35 @@
+package org.apache.lucene.codecs.lucene42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.SegmentWriteState;
+
+/**
+ * Read-write version of {@link Lucene42DocValuesFormat} for testing.
+ */
+public class Lucene42RWDocValuesFormat extends Lucene42DocValuesFormat {
+  
+  @Override
+  public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    // note: we choose DEFAULT here (its reasonably fast, and for small bpv has tiny waste)
+    return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
+  }
+}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/package.html
new file mode 100644
index 00000000000..f1c62d1e049
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/package.html
@@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Support for testing {@link org.apache.lucene.codecs.lucene42.Lucene42Codec}.
+</body>
+</html>
\ No newline at end of file
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index 008eeea7d32..c6a0bf393cc 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -2498,6 +2498,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     d.close();
   }
 
+  // nocommit: get this out of here and into the deprecated codecs (4.0, 4.2)
   public void testHugeBinaryValueLimit() throws Exception {
     // We only test DVFormats that have a limit
     assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
index 1ab8c818b7f..54159212fab 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@@ -40,6 +40,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40RWCodec;
 import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41RWCodec;
 import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene42.Lucene42RWCodec;
 import org.apache.lucene.codecs.lucene45.Lucene45Codec;
 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.index.RandomCodec;
@@ -147,7 +148,6 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
     
     savedCodec = Codec.getDefault();
     int randomVal = random.nextInt(10);
-    // nocommit: 4.2 impersonator
     if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) &&
                                           "random".equals(TEST_POSTINGSFORMAT) &&
                                           "random".equals(TEST_DOCVALUESFORMAT) &&
@@ -163,6 +163,13 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
                                                  !shouldAvoidCodec("Lucene41"))) { 
       codec = Codec.forName("Lucene41");
       assert codec instanceof Lucene41RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar";
+    } else if ("Lucene42".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) &&
+                                                 "random".equals(TEST_POSTINGSFORMAT) &&
+                                                 "random".equals(TEST_DOCVALUESFORMAT) &&
+                                                  randomVal == 2 &&
+                                                  !shouldAvoidCodec("Lucene42"))) { 
+      codec = Codec.forName("Lucene42");
+      assert codec instanceof Lucene42RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar";
     } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) {
       // the user wired postings or DV: this is messy
       // refactor into RandomCodec....
diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 9bd5ca87fd7..554bb0fbe07 100644
--- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -21,3 +21,4 @@ org.apache.lucene.codecs.compressing.HighCompressionCompressingCodec
 org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec
 org.apache.lucene.codecs.lucene40.Lucene40RWCodec
 org.apache.lucene.codecs.lucene41.Lucene41RWCodec
+org.apache.lucene.codecs.lucene42.Lucene42RWCodec
diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
index d1798334486..2086be1f0e7 100644
--- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@@ -14,3 +14,4 @@
 #  limitations under the License.
 
 org.apache.lucene.codecs.asserting.AssertingDocValuesFormat
+org.apache.lucene.codecs.lucene42.Lucene42RWDocValuesFormat

From 98522b2262eccebc56daed45cc3245461962e1c2 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 16:28:03 +0000
Subject: [PATCH 05/16] support missing for 4.5 and disk dv

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515496 13f79535-47bb-0310-9956-ffa450edef68
---
 .../codecs/diskdv/DiskDocValuesProducer.java  | 413 +-----------------
 .../simpletext/SimpleTextDocValuesReader.java |   8 +-
 .../lucene45/Lucene45DocValuesConsumer.java   |  83 ++--
 .../lucene45/Lucene45DocValuesProducer.java   | 127 ++++--
 .../apache/lucene/index/SortedDocValues.java  |   2 +-
 .../apache/lucene/search/TestFieldCache.java  |   6 +-
 .../lucene/index/AssertingAtomicReader.java   |   4 +-
 .../index/BaseDocValuesFormatTestCase.java    |   5 +-
 .../apache/lucene/util/LuceneTestCase.java    |   7 +-
 9 files changed, 167 insertions(+), 488 deletions(-)

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
index 41d2e87b9fe..4972706ed67 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
@@ -17,427 +17,34 @@ package org.apache.lucene.codecs.diskdv;
  * limitations under the License.
  */
 
-import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.DELTA_COMPRESSED;
-import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.GCD_COMPRESSED;
-import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.TABLE_COMPRESSED;
-
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer;
-import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
-import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesProducer;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.packed.BlockPackedReader;
 import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
-import org.apache.lucene.util.packed.PackedInts;
 
-class DiskDocValuesProducer extends DocValuesProducer {
-  private final Map<Integer,NumericEntry> numerics;
-  private final Map<Integer,NumericEntry> ords;
-  private final Map<Integer,NumericEntry> ordIndexes;
-  private final Map<Integer,BinaryEntry> binaries;
-  private final IndexInput data;
-  private final int maxDoc;
-  
+class DiskDocValuesProducer extends Lucene45DocValuesProducer {
+
   DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
-    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
-    this.maxDoc = state.segmentInfo.getDocCount();
-    // read in the entries from the metadata file.
-    IndexInput in = state.directory.openInput(metaName, state.context);
-    boolean success = false;
-    final int version;
-    try {
-      version = CodecUtil.checkHeader(in, metaCodec, 
-                                      Lucene45DocValuesFormat.VERSION_CURRENT,
-                                      Lucene45DocValuesFormat.VERSION_CURRENT);
-      numerics = new HashMap<Integer,NumericEntry>();
-      ords = new HashMap<Integer,NumericEntry>();
-      ordIndexes = new HashMap<Integer,NumericEntry>();
-      binaries = new HashMap<Integer,BinaryEntry>();
-      readFields(in);
-
-      success = true;
-    } finally {
-      if (success) {
-        IOUtils.close(in);
-      } else {
-        IOUtils.closeWhileHandlingException(in);
-      }
-    }
-
-    success = false;
-    try {
-      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
-      data = state.directory.openInput(dataName, state.context);
-      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
-                                                 Lucene45DocValuesFormat.VERSION_CURRENT,
-                                                 Lucene45DocValuesFormat.VERSION_CURRENT);
-      if (version != version2) {
-        throw new CorruptIndexException("Versions mismatch");
-      }
-
-      success = true;
-    } finally {
-      if (!success) {
-        IOUtils.closeWhileHandlingException(this.data);
-      }
-    }
-
-  }
-  
-  private void readFields(IndexInput meta) throws IOException {
-    int fieldNumber = meta.readVInt();
-    while (fieldNumber != -1) {
-      byte type = meta.readByte();
-      if (type == Lucene45DocValuesFormat.NUMERIC) {
-        numerics.put(fieldNumber, readNumericEntry(meta));
-      } else if (type == Lucene45DocValuesFormat.BINARY) {
-        BinaryEntry b = readBinaryEntry(meta);
-        binaries.put(fieldNumber, b);
-      } else if (type == Lucene45DocValuesFormat.SORTED) {
-        // sorted = binary + numeric
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != Lucene45DocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        BinaryEntry b = readBinaryEntry(meta);
-        binaries.put(fieldNumber, b);
-        
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-        }
-        NumericEntry n = readNumericEntry(meta);
-        ords.put(fieldNumber, n);
-      } else if (type == Lucene45DocValuesFormat.SORTED_SET) {
-        // sortedset = binary + numeric + ordIndex
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != Lucene45DocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        BinaryEntry b = readBinaryEntry(meta);
-        binaries.put(fieldNumber, b);
-        
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        NumericEntry n1 = readNumericEntry(meta);
-        ords.put(fieldNumber, n1);
-        
-        if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
-        }
-        NumericEntry n2 = readNumericEntry(meta);
-        ordIndexes.put(fieldNumber, n2);
-      } else {
-        throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta);
-      }
-      fieldNumber = meta.readVInt();
-    }
-  }
-  
-  static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
-    NumericEntry entry = new NumericEntry();
-    entry.format = meta.readVInt();
-    entry.packedIntsVersion = meta.readVInt();
-    entry.offset = meta.readLong();
-    entry.count = meta.readVLong();
-    entry.blockSize = meta.readVInt();
-    switch(entry.format) {
-      case GCD_COMPRESSED:
-        entry.minValue = meta.readLong();
-        entry.gcd = meta.readLong();
-        break;
-      case TABLE_COMPRESSED:
-        if (entry.count > Integer.MAX_VALUE) {
-          throw new CorruptIndexException("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
-        }
-        final int uniqueValues = meta.readVInt();
-        if (uniqueValues > 256) {
-          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
-        }
-        entry.table = new long[uniqueValues];
-        for (int i = 0; i < uniqueValues; ++i) {
-          entry.table[i] = meta.readLong();
-        }
-        break;
-      case DELTA_COMPRESSED:
-        break;
-      default:
-        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
-    }
-    return entry;
-  }
-  
-  static BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
-    BinaryEntry entry = new BinaryEntry();
-    int format = meta.readVInt();
-    if (format != Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED && format != Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED) {
-      throw new CorruptIndexException("Unexpected format for binary entry: " + format + ", input=" + meta);
-    }
-    entry.minLength = meta.readVInt();
-    entry.maxLength = meta.readVInt();
-    entry.count = meta.readVLong();
-    entry.offset = meta.readLong();
-    if (entry.minLength != entry.maxLength) {
-      entry.addressesOffset = meta.readLong();
-      entry.packedIntsVersion = meta.readVInt();
-      entry.blockSize = meta.readVInt();
-    }
-    return entry;
+    super(state, dataCodec, dataExtension, metaCodec, metaExtension);
   }
 
   @Override
-  public NumericDocValues getNumeric(FieldInfo field) throws IOException {
-    NumericEntry entry = numerics.get(field.number);
-    return getNumeric(field, entry);
-  }
-  
-  private LongNumericDocValues getNumeric(FieldInfo field, final NumericEntry entry) throws IOException {
-    final IndexInput data = this.data.clone();
-    data.seek(entry.offset);
-
-    switch (entry.format) {
-      case DELTA_COMPRESSED:
-        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-        return new LongNumericDocValues() {
-          @Override
-          public long get(long id) {
-            return reader.get(id);
-          }
-        };
-      case GCD_COMPRESSED:
-        final long min = entry.minValue;
-        final long mult = entry.gcd;
-        final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-        return new LongNumericDocValues() {
-          @Override
-          public long get(long id) {
-            return min + mult * quotientReader.get(id);
-          }
-        };
-      case TABLE_COMPRESSED:
-        final long[] table = entry.table;
-        final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
-        final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
-        return new LongNumericDocValues() {
-          @Override
-          long get(long id) {
-            return table[(int) ords.get((int) id)];
-          }
-        };
-      default:
-        throw new AssertionError();
-    }
-  }
-
-  @Override
-  public BinaryDocValues getBinary(FieldInfo field) throws IOException {
-    BinaryEntry bytes = binaries.get(field.number);
-    if (bytes.minLength == bytes.maxLength) {
-      return getFixedBinary(field, bytes);
-    } else {
-      return getVariableBinary(field, bytes);
-    }
-  }
-  
-  private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
-    final IndexInput data = this.data.clone();
-
-    return new LongBinaryDocValues() {
-      @Override
-      public void get(long id, BytesRef result) {
-        long address = bytes.offset + id * bytes.maxLength;
-        try {
-          data.seek(address);
-          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
-          // assume "they" own the bytes after calling this!
-          final byte[] buffer = new byte[bytes.maxLength];
-          data.readBytes(buffer, 0, buffer.length);
-          result.bytes = buffer;
-          result.offset = 0;
-          result.length = buffer.length;
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    };
-  }
-  
-  private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
-    final IndexInput data = this.data.clone();
+  protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
     data.seek(bytes.addressesOffset);
-
-    final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, true);
-    return new LongBinaryDocValues() {
-      @Override
-      public void get(long id, BytesRef result) {
-        long startAddress = bytes.offset + (id == 0 ? 0 : + addresses.get(id-1));
-        long endAddress = bytes.offset + addresses.get(id);
-        int length = (int) (endAddress - startAddress);
-        try {
-          data.seek(startAddress);
-          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
-          // assume "they" own the bytes after calling this!
-          final byte[] buffer = new byte[length];
-          data.readBytes(buffer, 0, buffer.length);
-          result.bytes = buffer;
-          result.offset = 0;
-          result.length = length;
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    };
+    return new MonotonicBlockPackedReader(data.clone(), bytes.packedIntsVersion, bytes.blockSize, bytes.count, true);
   }
 
   @Override
-  public SortedDocValues getSorted(FieldInfo field) throws IOException {
-    final int valueCount = (int) binaries.get(field.number).count;
-    final BinaryDocValues binary = getBinary(field);
-    final NumericDocValues ordinals = getNumeric(field, ords.get(field.number));
-    return new SortedDocValues() {
-
-      @Override
-      public int getOrd(int docID) {
-        return (int) ordinals.get(docID);
-      }
-
-      @Override
-      public void lookupOrd(int ord, BytesRef result) {
-        binary.get(ord, result);
-      }
-
-      @Override
-      public int getValueCount() {
-        return valueCount;
-      }
-    };
+  protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
+    throw new AssertionError();
   }
 
   @Override
-  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
-    final long valueCount = binaries.get(field.number).count;
-    final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
-    final LongNumericDocValues ordinals = getNumeric(field, ords.get(field.number));
-    NumericEntry entry = ordIndexes.get(field.number);
-    IndexInput data = this.data.clone();
+  protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
     data.seek(entry.offset);
-    final MonotonicBlockPackedReader ordIndex = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-    
-    return new SortedSetDocValues() {
-      long offset;
-      long endOffset;
-      
-      @Override
-      public long nextOrd() {
-        if (offset == endOffset) {
-          return NO_MORE_ORDS;
-        } else {
-          long ord = ordinals.get(offset);
-          offset++;
-          return ord;
-        }
-      }
-
-      @Override
-      public void setDocument(int docID) {
-        offset = (docID == 0 ? 0 : ordIndex.get(docID-1));
-        endOffset = ordIndex.get(docID);
-      }
-
-      @Override
-      public void lookupOrd(long ord, BytesRef result) {
-        binary.get(ord, result);
-      }
-
-      @Override
-      public long getValueCount() {
-        return valueCount;
-      }
-    };
-  }
-  
-  @Override
-  public Bits getDocsWithField(FieldInfo field) throws IOException {
-    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
-      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
-    } else {
-      return new Bits.MatchAllBits(maxDoc);
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    data.close();
-  }
-  
-  static class NumericEntry {
-    long offset;
-
-    int format;
-    int packedIntsVersion;
-    long count;
-    int blockSize;
-    
-    long minValue;
-    long gcd;
-    long table[];
-  }
-  
-  static class BinaryEntry {
-    long offset;
-
-    long count;
-    int minLength;
-    int maxLength;
-    long addressesOffset;
-    int packedIntsVersion;
-    int blockSize;
-  }
-  
-  // internally we compose complex dv (sorted/sortedset) from other ones
-  static abstract class LongNumericDocValues extends NumericDocValues {
-    @Override
-    public final long get(int docID) {
-      return get((long) docID);
-    }
-    
-    abstract long get(long id);
-  }
-  
-  static abstract class LongBinaryDocValues extends BinaryDocValues {
-    @Override
-    public final void get(int docID, BytesRef result) {
-      get((long)docID, result);
-    }
-    
-    abstract void get(long id, BytesRef Result);
+    return new MonotonicBlockPackedReader(data.clone(), entry.packedIntsVersion, entry.blockSize, entry.count, true);
   }
 }
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
index c625f4ea2e8..9ead984bb84 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
@@ -321,12 +321,8 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
       @Override
       public void lookupOrd(int ord, BytesRef result) {
         try {
-          if (ord == -1) {
-            result.length = 0;
-            return;
-          }
-          if (ord < -1 || ord >= field.numValues) {
-            throw new IndexOutOfBoundsException("ord must be -1 .. " + (field.numValues-1) + "; got " + ord);
+          if (ord < 0 || ord >= field.numValues) {
+            throw new IndexOutOfBoundsException("ord must be 0 .. " + (field.numValues-1) + "; got " + ord);
           }
           in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
           SimpleTextUtil.readLine(in, scratch);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
index 942ee228045..21ee03075f5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
@@ -23,7 +23,6 @@ import java.util.HashSet;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.codecs.MissingOrdRemapper;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -88,14 +87,20 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
     long minValue = Long.MAX_VALUE;
     long maxValue = Long.MIN_VALUE;
     long gcd = 0;
+    boolean missing = false;
     // TODO: more efficient?
     HashSet<Long> uniqueValues = null;
     if (optimizeStorage) {
       uniqueValues = new HashSet<>();
 
-      // nocommit: impl null values (ideally smartly)
       for (Number nv : values) {
-        final long v = nv == null ? 0 : nv.longValue();
+        final long v;
+        if (nv == null) {
+          v = 0;
+          missing = true;
+        } else {
+          v = nv.longValue();
+        }
 
         if (gcd != 1) {
           if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
@@ -142,6 +147,12 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
     meta.writeVInt(field.number);
     meta.writeByte(Lucene45DocValuesFormat.NUMERIC);
     meta.writeVInt(format);
+    if (missing) {
+      meta.writeLong(data.getFilePointer());
+      writeMissingBitset(values);
+    } else {
+      meta.writeLong(-1L);
+    }
     meta.writeVInt(PackedInts.VERSION_CURRENT);
     meta.writeLong(data.getFilePointer());
     meta.writeVLong(count);
@@ -184,6 +195,27 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
         throw new AssertionError();
     }
   }
+  
+  // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
+  // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
+  void writeMissingBitset(Iterable<?> values) throws IOException {
+    byte bits = 0;
+    int count = 0;
+    for (Object v : values) {
+      if (count == 8) {
+        data.writeByte(bits);
+        count = 0;
+        bits = 0;
+      }
+      if (v != null) {
+        bits |= 1 << (count & 7);
+      }
+      count++;
+    }
+    if (count > 0) {
+      data.writeByte(bits);
+    }
+  }
 
   @Override
   public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
@@ -194,8 +226,15 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
     int maxLength = Integer.MIN_VALUE;
     final long startFP = data.getFilePointer();
     long count = 0;
+    boolean missing = false;
     for(BytesRef v : values) {
-      final int length = v == null ? 0 : v.length;
+      final int length;
+      if (v == null) {
+        length = 0;
+        missing = true;
+      } else {
+        length = v.length;
+      }
       minLength = Math.min(minLength, length);
       maxLength = Math.max(maxLength, length);
       if (v != null) {
@@ -204,6 +243,12 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
       count++;
     }
     meta.writeVInt(minLength == maxLength ? BINARY_FIXED_UNCOMPRESSED : BINARY_VARIABLE_UNCOMPRESSED);
+    if (missing) {
+      meta.writeLong(data.getFilePointer());
+      writeMissingBitset(values);
+    } else {
+      meta.writeLong(-1L);
+    }
     meta.writeVInt(minLength);
     meta.writeVInt(maxLength);
     meta.writeVLong(count);
@@ -244,6 +289,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
       meta.writeVInt(field.number);
       meta.writeByte(Lucene45DocValuesFormat.BINARY);
       meta.writeVInt(BINARY_PREFIX_COMPRESSED);
+      meta.writeLong(-1L);
       // now write the bytes: sharing prefixes within a block
       final long startFP = data.getFilePointer();
       // currently, we have to store the delta from expected for every 1/nth term
@@ -286,34 +332,6 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
 
   @Override
   public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
-    // nocommit: remove this hack and support missing!
-
-    // three cases for simulating the old writer:
-    // 1. no missing
-    // 2. missing (and empty string in use): remap ord=-1 -> ord=0
-    // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values
-    boolean anyMissing = false;
-    for (Number n : docToOrd) {
-      if (n.longValue() == -1) {
-        anyMissing = true;
-        break;
-      }
-    }
-    
-    boolean hasEmptyString = false;
-    for (BytesRef b : values) {
-      hasEmptyString = b.length == 0;
-      break;
-    }
-    
-    if (!anyMissing) {
-      // nothing to do
-    } else if (hasEmptyString) {
-      docToOrd = MissingOrdRemapper.mapMissingToOrd0(docToOrd);
-    } else {
-      docToOrd = MissingOrdRemapper.mapAllOrds(docToOrd);
-      values = MissingOrdRemapper.insertEmptyValue(values);
-    }
     meta.writeVInt(field.number);
     meta.writeByte(Lucene45DocValuesFormat.SORTED);
     addTermsDict(field, values);
@@ -334,6 +352,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
     meta.writeVInt(field.number);
     meta.writeByte(Lucene45DocValuesFormat.NUMERIC);
     meta.writeVInt(DELTA_COMPRESSED);
+    meta.writeLong(-1L);
     meta.writeVInt(PackedInts.VERSION_CURRENT);
     meta.writeLong(data.getFilePointer());
     meta.writeVLong(maxDoc);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
index b19a34e169c..b1ca3a8cf60 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
@@ -53,7 +53,7 @@ import org.apache.lucene.util.packed.BlockPackedReader;
 import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
 import org.apache.lucene.util.packed.PackedInts;
 
-class Lucene45DocValuesProducer extends DocValuesProducer {
+public class Lucene45DocValuesProducer extends DocValuesProducer {
   private final Map<Integer,NumericEntry> numerics;
   private final Map<Integer,BinaryEntry> binaries;
   private final Map<Integer,NumericEntry> ords;
@@ -65,7 +65,7 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
   private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
   private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
   
-  Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+  protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
     // read in the entries from the metadata file.
     IndexInput in = state.directory.openInput(metaName, state.context);
@@ -176,6 +176,7 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
   static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
     NumericEntry entry = new NumericEntry();
     entry.format = meta.readVInt();
+    entry.missingOffset = meta.readLong();
     entry.packedIntsVersion = meta.readVInt();
     entry.offset = meta.readLong();
     entry.count = meta.readVLong();
@@ -209,6 +210,7 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
   static BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
     BinaryEntry entry = new BinaryEntry();
     entry.format = meta.readVInt();
+    entry.missingOffset = meta.readLong();
     entry.minLength = meta.readVInt();
     entry.maxLength = meta.readVInt();
     entry.count = meta.readVLong();
@@ -315,9 +317,7 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
-  private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
-    final IndexInput data = this.data.clone();
-    
+  protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
     final MonotonicBlockPackedReader addresses;
     synchronized (addressInstances) {
       MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
@@ -328,6 +328,13 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
       }
       addresses = addrInstance;
     }
+    return addresses;
+  }
+  
+  private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+    final IndexInput data = this.data.clone();
+    
+    final MonotonicBlockPackedReader addresses = getAddressInstance(data, field, bytes);
 
     return new LongBinaryDocValues() {
       @Override
@@ -350,12 +357,10 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
       }
     };
   }
-
-  private BinaryDocValues getCompressedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
-    final IndexInput data = this.data.clone();
-    final long interval = bytes.addressInterval;
-
+  
+  protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
     final MonotonicBlockPackedReader addresses;
+    final long interval = bytes.addressInterval;
     synchronized (addressInstances) {
       MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
       if (addrInstance == null) {
@@ -371,6 +376,14 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
       }
       addresses = addrInstance;
     }
+    return addresses;
+  }
+
+
+  private BinaryDocValues getCompressedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+    final IndexInput data = this.data.clone();
+
+    final MonotonicBlockPackedReader addresses = getIntervalInstance(data, field, bytes);
     
     return new CompressedBinaryDocValues(bytes, addresses, data);
   }
@@ -420,26 +433,30 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
       }
     };
   }
-
-  @Override
-  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
-    final long valueCount = binaries.get(field.number).count;
-    // we keep the byte[]s and list of ords on disk, these could be large
-    final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
-    final LongNumericDocValues ordinals = getNumeric(ords.get(field.number));
-    // but the addresses to the ord stream are in RAM
+  
+  protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
     final MonotonicBlockPackedReader ordIndex;
     synchronized (ordIndexInstances) {
       MonotonicBlockPackedReader ordIndexInstance = ordIndexInstances.get(field.number);
       if (ordIndexInstance == null) {
-        NumericEntry entry = ordIndexes.get(field.number);
-        IndexInput data = this.data.clone();
         data.seek(entry.offset);
         ordIndexInstance = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
         ordIndexInstances.put(field.number, ordIndexInstance);
       }
       ordIndex = ordIndexInstance;
     }
+    return ordIndex;
+  }
+
+  @Override
+  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+    final IndexInput data = this.data.clone();
+    final long valueCount = binaries.get(field.number).count;
+    // we keep the byte[]s and list of ords on disk, these could be large
+    final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
+    final LongNumericDocValues ordinals = getNumeric(ords.get(field.number));
+    // but the addresses to the ord stream are in RAM
+    final MonotonicBlockPackedReader ordIndex = getOrdIndexInstance(data, field, ordIndexes.get(field.number));
     
     return new SortedSetDocValues() {
       long offset;
@@ -491,15 +508,47 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
       }
     };
   }
+  
+  public Bits getMissingBits(final long offset) throws IOException {
+    if (offset == -1) {
+      return new Bits.MatchAllBits(maxDoc);
+    } else {
+      final IndexInput in = data.clone();
+      return new Bits() {
+
+        @Override
+        public boolean get(int index) {
+          try {
+            in.seek(offset + (index >> 3));
+            return (in.readByte() & (1 << (index & 7))) != 0;
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        }
+
+        @Override
+        public int length() {
+          return maxDoc;
+        }
+      };
+    }
+  }
 
   @Override
   public Bits getDocsWithField(FieldInfo field) throws IOException {
-    // nocommit: only use this if the field's entry has missing values (write that),
-    // otherwise return MatchAllBits
-    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
-      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
-    } else {
-      return new Bits.MatchAllBits(maxDoc);
+    switch(field.getDocValuesType()) {
+      case SORTED_SET:
+        return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+      case SORTED:
+        return new SortedDocsWithField(getSorted(field), maxDoc);
+      case BINARY:
+        BinaryEntry be = binaries.get(field.number);
+        return getMissingBits(be.missingOffset);
+      case NUMERIC:
+        NumericEntry ne = numerics.get(field.number);
+        return getMissingBits(ne.missingOffset);
+      default:
+        throw new AssertionError();
     }
   }
 
@@ -508,30 +557,32 @@ class Lucene45DocValuesProducer extends DocValuesProducer {
     data.close();
   }
   
-  static class NumericEntry {
-    long offset;
+  protected static class NumericEntry {
+    long missingOffset;
+    public long offset;
 
-    int format;
-    int packedIntsVersion;
-    long count;
-    int blockSize;
+    public int format;
+    public int packedIntsVersion;
+    public long count;
+    public int blockSize;
     
     long minValue;
     long gcd;
     long table[];
   }
   
-  static class BinaryEntry {
+  protected static class BinaryEntry {
+    long missingOffset;
     long offset;
 
     int format;
-    long count;
+    public long count;
     int minLength;
     int maxLength;
-    long addressesOffset;
-    long addressInterval;
-    int packedIntsVersion;
-    int blockSize;
+    public long addressesOffset;
+    public long addressInterval;
+    public int packedIntsVersion;
+    public int blockSize;
   }
   
   // internally we compose complex dv (sorted/sortedset) from other ones
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
index 1968a791157..df36931a253 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@@ -43,7 +43,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
   public abstract int getOrd(int docID);
 
   /** Retrieves the value for the specified ordinal.
-   * @param ord ordinal to lookup
+   * @param ord ordinal to lookup (must be &gt;= 0 and &lt {@link #getValueCount()})
    * @param result will be populated with the ordinal's value
    * @see #getOrd(int) 
    */
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
index da1cf218617..c36367277bb 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
@@ -480,7 +480,7 @@ public class TestFieldCache extends LuceneTestCase {
     } catch (IllegalStateException expected) {}
     
     Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary");
-    assertTrue(bits instanceof Bits.MatchAllBits);
+    assertTrue(bits.get(0));
     
     // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
     try {
@@ -510,7 +510,7 @@ public class TestFieldCache extends LuceneTestCase {
     assertEquals(1, sortedSet.getValueCount());
     
     bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted");
-    assertTrue(bits instanceof Bits.MatchAllBits);
+    assertTrue(bits.get(0));
     
     // Numeric type: can be retrieved via getInts() and so on
     Ints numeric = FieldCache.DEFAULT.getInts(ar, "numeric", false);
@@ -537,7 +537,7 @@ public class TestFieldCache extends LuceneTestCase {
     } catch (IllegalStateException expected) {}
     
     bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
-    assertTrue(bits instanceof Bits.MatchAllBits);
+    assertTrue(bits.get(0));
     
     // SortedSet type: can be retrieved via getDocTermOrds() 
     if (defaultCodecSupportsSortedSet()) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
index 6db5ccdeea3..eb6b20df5eb 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
@@ -438,14 +438,14 @@ public class AssertingAtomicReader extends FilterAtomicReader {
       this.in = in;
       this.maxDoc = maxDoc;
       this.valueCount = in.getValueCount();
-      assert valueCount >= 1 && valueCount <= maxDoc;
+      assert valueCount >= 0 && valueCount <= maxDoc;
     }
 
     @Override
     public int getOrd(int docID) {
       assert docID >= 0 && docID < maxDoc;
       int ord = in.getOrd(docID);
-      assert ord >= 0 && ord < valueCount;
+      assert ord >= -1 && ord < valueCount;
       return ord;
     }
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index c6a0bf393cc..8622c4ad9e1 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -696,7 +696,10 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     BytesRef scratch = new BytesRef();
     dv.lookupOrd(dv.getOrd(0), scratch);
     assertEquals(new BytesRef("hello world 2"), scratch);
-    dv.lookupOrd(dv.getOrd(1), scratch);
+    if (codecSupportsDocsWithField("dv")) {
+      assertEquals(-1, dv.getOrd(1));
+    }
+    dv.get(1, scratch);
     assertEquals(new BytesRef(""), scratch);
     ireader.close();
     directory.close();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index f14f772e0df..27f736b8706 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -1372,8 +1372,11 @@ public abstract class LuceneTestCase extends Assert {
   /** Returns true if the codec for the field "supports" docsWithField 
    * (other codecs return MatchAllBits, because you couldnt write missing values before) */
   public static boolean codecSupportsDocsWithField(String field) {
-    // currently only one codec!
-    return _TestUtil.getDocValuesFormat(Codec.getDefault(), field).equals("SimpleText");
+    String name = _TestUtil.getDocValuesFormat(Codec.getDefault(), field);
+    if (name.equals("Lucene40") || name.equals("Lucene42")) {
+      return false;
+    }
+    return true;
   }
 
   public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {

From d208878c116bb66d6cb114ce51f69b7264ab668a Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 17:23:52 +0000
Subject: [PATCH 06/16] improve DV faceting tests, support missing count for
 single valued string fields, remove required/default restriction

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515520 13f79535-47bb-0310-9956-ffa450edef68
---
 .../valuesource/BytesRefFieldSource.java      |  3 +-
 .../index/BaseDocValuesFormatTestCase.java    | 37 +++++++++++++++++++
 .../apache/solr/request/DocValuesFacets.java  | 11 ++----
 .../apache/solr/request/NumericFacets.java    |  2 +-
 .../java/org/apache/solr/schema/StrField.java |  3 --
 .../org/apache/solr/schema/TrieField.java     |  3 --
 ...hema-docValues-not-required-no-default.xml | 33 -----------------
 .../conf/schema-docValuesFaceting.xml         |  9 ++---
 .../org/apache/solr/TestRandomDVFaceting.java | 25 +++++++++----
 .../solr/schema/BadIndexSchemaTest.java       |  4 --
 solr/example/solr/collection1/conf/schema.xml |  6 ++-
 11 files changed, 69 insertions(+), 67 deletions(-)
 delete mode 100644 solr/core/src/test-files/solr/collection1/conf/bad-schema-docValues-not-required-no-default.xml

diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
index edbc37c8c76..871c94cc50e 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
@@ -45,12 +45,13 @@ public class BytesRefFieldSource extends FieldCacheSource {
     // To be sorted or not to be sorted, that is the question
     // TODO: do it cleaner?
     if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
+      final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
       final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
       return new FunctionValues() {
 
         @Override
         public boolean exists(int doc) {
-          return true; // doc values are dense
+          return docsWithField.get(doc);
         }
 
         @Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index 8622c4ad9e1..48e23848aea 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -650,6 +650,43 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     ireader.close();
     directory.close();
   }
+  
+  public void testSortedMergeAwayAllValues() throws IOException {
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.NO));
+    iwriter.addDocument(doc);    
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.NO));
+    doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    iwriter.commit();
+    iwriter.deleteDocuments(new Term("id", "1"));
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
+    if (codecSupportsDocsWithField("field")) {
+      assertEquals(-1, dv.getOrd(0));
+      assertEquals(0, dv.getValueCount());
+    } else {
+      assertEquals(0, dv.getOrd(0));
+      assertEquals(1, dv.getValueCount());
+      BytesRef ref = new BytesRef();
+      dv.lookupOrd(0, ref);
+      assertEquals(new BytesRef(), ref);
+    }
+    
+    ireader.close();
+    directory.close();
+  }
 
   public void testBytesWithNewline() throws IOException {
     Analyzer analyzer = new MockAnalyzer(random());
diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
index bca44a19d76..fb424fdee8b 100644
--- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
+++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
@@ -218,12 +218,7 @@ public class DocValuesFacets {
   static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, SchemaField schemaField, DocSet docs, int missingCount, boolean missing) throws IOException {
     if (missing) {
       if (missingCount < 0) {
-        if (schemaField.multiValued()) {
-          missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
-        } else {
-          // nocommit: support missing count (ord = -1) for single-valued here.
-          missingCount = 0; // single-valued dv is implicitly 0
-        }
+        missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
       }
       res.add(null, missingCount);
     }
@@ -232,12 +227,12 @@ public class DocValuesFacets {
   }
   
   /** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
-  // specialized since the single-valued case is simpler: you don't have to deal with missing count, etc
+  // specialized since the single-valued case is different
   static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       int term = si.getOrd(doc);
-      if (map != null) {
+      if (map != null && term >= 0) {
         term = (int) map.getGlobalOrd(subIndex, term);
       }
       int arrIdx = term-startTermIndex;
diff --git a/solr/core/src/java/org/apache/solr/request/NumericFacets.java b/solr/core/src/java/org/apache/solr/request/NumericFacets.java
index e16e6358ca4..62950e2e7ca 100644
--- a/solr/core/src/java/org/apache/solr/request/NumericFacets.java
+++ b/solr/core/src/java/org/apache/solr/request/NumericFacets.java
@@ -255,7 +255,7 @@ final class NumericFacets {
 
       if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
         if (!sf.indexed()) {
-          throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on a field which is not indexed");
+          throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
         }
         // Add zeros until there are limit results
         final Set<String> alreadySeen = new HashSet<String>();
diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java
index 7c59741d7d8..2c9600c67de 100644
--- a/solr/core/src/java/org/apache/solr/schema/StrField.java
+++ b/solr/core/src/java/org/apache/solr/schema/StrField.java
@@ -80,9 +80,6 @@ public class StrField extends PrimitiveFieldType {
 
   @Override
   public void checkSchemaField(SchemaField field) {
-    if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
-      throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
-    }
   }
 }
 
diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java
index 99cff2204b5..d0e92f7e427 100644
--- a/solr/core/src/java/org/apache/solr/schema/TrieField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java
@@ -696,9 +696,6 @@ public class TrieField extends PrimitiveFieldType {
 
   @Override
   public void checkSchemaField(final SchemaField field) {
-    if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
-      throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
-    }
   }
 }
 
diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-docValues-not-required-no-default.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-docValues-not-required-no-default.xml
deleted file mode 100644
index deadd9ac68b..00000000000
--- a/solr/core/src/test-files/solr/collection1/conf/bad-schema-docValues-not-required-no-default.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<?xml version="1.0" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<schema name="bad-schema-docValues-not-required-no-default" version="1.0">
-  <types>
-    <fieldType name="string" class="solr.StrField" />
- </types>
-
-
- <fields>
-   <!-- docValues must be required or have a default value -->
-   <field name="id" type="string" docValues="true" multiValued="false"/>
- </fields>
-
- <defaultSearchField>id</defaultSearchField>
- <uniqueKey>id</uniqueKey>
-
-</schema>
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
index e811f91e8a1..0e3116d0797 100755
--- a/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
@@ -26,17 +26,16 @@
   <fields>
     <field name="id"    type="string" indexed="true"  stored="true"  docValues="false" multiValued="false" required="true"/>
     <field name="id_dv" type="string" indexed="false" stored="false" docValues="true"  multiValued="false" required="true"/>
-    <!-- TODO: improve this test so we don't have to make all these DV types multivalued (for missing values) -->
     <dynamicField name="*_i"     type="int"    indexed="true"  stored="false" docValues="false"/>
-    <dynamicField name="*_i_dv"  type="int"    indexed="false" stored="false" docValues="true"  multiValued="true"/>  
+    <dynamicField name="*_i_dv"  type="int"    indexed="false" stored="false" docValues="true"/>  
     <dynamicField name="*_is"    type="int"    indexed="true"  stored="false" docValues="false" multiValued="true"/>
     <dynamicField name="*_is_dv" type="int"    indexed="false" stored="false" docValues="true"  multiValued="true"/>
-    <dynamicField name="*_s"     type="string" indexed="true"  stored="false" docValues="false" multiValued="true"/>
-    <dynamicField name="*_s_dv"  type="string" indexed="false" stored="false" docValues="true"  multiValued="true"/>
+    <dynamicField name="*_s"     type="string" indexed="true"  stored="false" docValues="false"/>
+    <dynamicField name="*_s_dv"  type="string" indexed="false" stored="false" docValues="true"/>
     <dynamicField name="*_ss"    type="string" indexed="true"  stored="false" docValues="false" multiValued="true"/>
     <dynamicField name="*_ss_dv" type="string" indexed="false" stored="false" docValues="true"  multiValued="true"/>
     <dynamicField name="*_f"     type="float"  indexed="true"  stored="false" docValues="false"/>
-    <dynamicField name="*_f_dv"  type="float"  indexed="false" stored="false" docValues="true"  multiValued="true"/>
+    <dynamicField name="*_f_dv"  type="float"  indexed="false" stored="false" docValues="true"/>
   </fields>
 
   <defaultSearchField>id</defaultSearchField>
diff --git a/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java b/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java
index df36b6b593b..b6581af49bf 100644
--- a/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java
+++ b/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java
@@ -39,7 +39,7 @@ import org.junit.Test;
  * to the indexed facet results as if it were just another faceting method.
  */
 @Slow
-@SuppressCodecs({"Lucene40", "Lucene41"})
+@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"})
 public class TestRandomDVFaceting extends SolrTestCaseJ4 {
 
   @BeforeClass
@@ -162,6 +162,8 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
 
       SchemaField sf = req.getSchema().getField(ftype.fname);
       boolean multiValued = sf.getType().multiValuedFieldCache();
+      boolean indexed = sf.indexed();
+      boolean numeric = sf.getType().getNumericType() != null;
 
       int offset = 0;
       if (rand.nextInt(100) < 20) {
@@ -179,8 +181,21 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
         params.add("facet.limit", Integer.toString(limit));
       }
 
-      if (rand.nextBoolean()) {
-        params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
+      // the following two situations cannot work for unindexed single-valued numerics:
+      // (currently none of the dv fields in this test config)
+      //     facet.sort = index
+      //     facet.minCount = 0
+      if (!numeric || sf.multiValued()) {
+        if (rand.nextBoolean()) {
+          params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
+        }
+        
+        if (rand.nextInt(100) < 10) {
+          params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
+        }
+      } else {
+        params.add("facet.sort", "count");
+        params.add("facet.mincount", Integer.toString(1+rand.nextInt(5)));
       }
 
       if ((ftype.vals instanceof SVal) && rand.nextInt(100) < 20) {
@@ -192,10 +207,6 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
         params.add("facet.prefix", prefix);
       }
 
-      if (rand.nextInt(100) < 10) {
-        params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
-      }
-
       if (rand.nextInt(100) < 20) {
         params.add("facet.missing", "true");
       }
diff --git a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
index 1a2693e56fd..a4453ddfbc3 100644
--- a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
@@ -93,10 +93,6 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase {
     doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not support");
   }
 
-  public void testDocValuesNotRequiredNoDefault() throws Exception {
-    doTest("bad-schema-docValues-not-required-no-default.xml", "has no default value and is not required");
-  }
-
   public void testDocValuesUnsupported() throws Exception {
     doTest("bad-schema-unsupported-docValues.xml", "does not support doc values");
   }
diff --git a/solr/example/solr/collection1/conf/schema.xml b/solr/example/solr/collection1/conf/schema.xml
index 9cdd2976026..75fad489f25 100755
--- a/solr/example/solr/collection1/conf/schema.xml
+++ b/solr/example/solr/collection1/conf/schema.xml
@@ -168,8 +168,10 @@
    <!--
      Some fields such as popularity and manu_exact could be modified to
      leverage doc values:
-     <field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" />
-     <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" />
+     <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
+     <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
+     <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
+
 
      Although it would make indexing slightly slower and the index bigger, it
      would also make the index faster to load, more memory-efficient and more

From 1660bccf5dbfc4818e9d9166f9b3c6b45528e1dd Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 17:45:53 +0000
Subject: [PATCH 07/16] add some sort missing last tests for dv

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515529 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/search/TestSortDocValues.java      | 268 ++++++++++++++++++
 1 file changed, 268 insertions(+)

diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java b/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java
index 49f954d319f..7a24a11845f 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java
@@ -31,10 +31,12 @@ import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 
 /** Tests basic sorting on docvalues fields.
  * These are mostly like TestSort's tests, except each test
  * indexes the field up-front as docvalues, and checks no fieldcaches were made */
+@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // avoid codecs that don't support "missing"
 public class TestSortDocValues extends LuceneTestCase {
   
   @Override
@@ -291,6 +293,70 @@ public class TestSortDocValues extends LuceneTestCase {
     dir.close();
   }
   
+  /** Tests sorting on type int with a missing value */
+  public void testIntMissing() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", -1));
+    doc.add(newStringField("value", "-1", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", 4));
+    doc.add(newStringField("value", "4", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.INT));
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(3, td.totalHits);
+    // null is treated as a 0
+    assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
+  
+  /** Tests sorting on type int, specifying the missing value should be treated as Integer.MAX_VALUE */
+  public void testIntMissingLast() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", -1));
+    doc.add(newStringField("value", "-1", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", 4));
+    doc.add(newStringField("value", "4", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    SortField sortField = new SortField("value", SortField.Type.INT);
+    sortField.setMissingValue(Integer.MAX_VALUE);
+    Sort sort = new Sort(sortField);
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(3, td.totalHits);
+    // null is treated as a Integer.MAX_VALUE
+    assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
+  
   /** Tests sorting on type long */
   public void testLong() throws IOException {
     Directory dir = newDirectory();
@@ -359,6 +425,70 @@ public class TestSortDocValues extends LuceneTestCase {
     dir.close();
   }
   
+  /** Tests sorting on type long with a missing value */
+  public void testLongMissing() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", -1));
+    doc.add(newStringField("value", "-1", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", 4));
+    doc.add(newStringField("value", "4", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(3, td.totalHits);
+    // null is treated as 0
+    assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
+  
+  /** Tests sorting on type long, specifying the missing value should be treated as Long.MAX_VALUE */
+  public void testLongMissingLast() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", -1));
+    doc.add(newStringField("value", "-1", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("value", 4));
+    doc.add(newStringField("value", "4", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    SortField sortField = new SortField("value", SortField.Type.LONG);
+    sortField.setMissingValue(Long.MAX_VALUE);
+    Sort sort = new Sort(sortField);
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(3, td.totalHits);
+    // null is treated as Long.MAX_VALUE
+    assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
+  
   /** Tests sorting on type float */
   public void testFloat() throws IOException {
     Directory dir = newDirectory();
@@ -427,6 +557,70 @@ public class TestSortDocValues extends LuceneTestCase {
     dir.close();
   }
   
+  /** Tests sorting on type float with a missing value */
+  public void testFloatMissing() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new FloatDocValuesField("value", -1.3F));
+    doc.add(newStringField("value", "-1.3", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new FloatDocValuesField("value", 4.2F));
+    doc.add(newStringField("value", "4.2", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(3, td.totalHits);
+    // null is treated as 0
+    assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertEquals("4.2", searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
+  
+  /** Tests sorting on type float, specifying the missing value should be treated as Float.MAX_VALUE */
+  public void testFloatMissingLast() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new FloatDocValuesField("value", -1.3F));
+    doc.add(newStringField("value", "-1.3", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new FloatDocValuesField("value", 4.2F));
+    doc.add(newStringField("value", "4.2", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    SortField sortField = new SortField("value", SortField.Type.FLOAT);
+    sortField.setMissingValue(Float.MAX_VALUE);
+    Sort sort = new Sort(sortField);
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(3, td.totalHits);
+    // null is treated as Float.MAX_VALUE
+    assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
+  
   /** Tests sorting on type double */
   public void testDouble() throws IOException {
     Directory dir = newDirectory();
@@ -533,4 +727,78 @@ public class TestSortDocValues extends LuceneTestCase {
     ir.close();
     dir.close();
   }
+  
+  /** Tests sorting on type double with a missing value */
+  public void testDoubleMissing() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("value", -1.3));
+    doc.add(newStringField("value", "-1.3", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("value", 4.2333333333333));
+    doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("value", 4.2333333333332));
+    doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(4, td.totalHits);
+    // null treated as a 0
+    assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value"));
+    assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[3].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
+  
+  /** Tests sorting on type double, specifying the missing value should be treated as Double.MAX_VALUE */
+  public void testDoubleMissingLast() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("value", -1.3));
+    doc.add(newStringField("value", "-1.3", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("value", 4.2333333333333));
+    doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("value", 4.2333333333332));
+    doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
+    writer.addDocument(doc);
+    IndexReader ir = writer.getReader();
+    writer.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    SortField sortField = new SortField("value", SortField.Type.DOUBLE);
+    sortField.setMissingValue(Double.MAX_VALUE);
+    Sort sort = new Sort(sortField);
+
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(4, td.totalHits);
+    // null treated as Double.MAX_VALUE
+    assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+    assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value"));
+    assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value"));
+    assertNull(searcher.doc(td.scoreDocs[3].doc).get("value"));
+
+    ir.close();
+    dir.close();
+  }
 }

From e92590101417f95cc6066e4c3302482aaae3cc49 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 17:51:49 +0000
Subject: [PATCH 08/16] move to test-framework (only needed for simulating old
 codecs which have no concept of missing)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515532 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/java/org/apache/lucene/codecs/MissingOrdRemapper.java | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
 rename lucene/{core => test-framework}/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java (93%)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java
similarity index 93%
rename from lucene/core/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java
rename to lucene/test-framework/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java
index 61e17598bcb..770cfbda68c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/MissingOrdRemapper.java
@@ -22,11 +22,9 @@ import org.apache.lucene.util.BytesRef;
  */
 
 /** 
- * a utility class to write missing values for SORTED_SET as if they were the empty string
+ * a utility class to write missing values for SORTED as if they were the empty string
  * (to simulate pre-Lucene4.5 dv behavior for testing old codecs)
  */
-// nocommit: move this to test-framework with all the impersonators of
-// these old codecs once new memory/disk codecs are written that support missing
 public class MissingOrdRemapper {
   
   /** insert an empty byte[] to the front of this iterable */

From c2f37e0de2d94fbe4c07f3d0aecfea250788eebb Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 19:07:49 +0000
Subject: [PATCH 09/16] javadocs/cleanups

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515563 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/codecs/DocValuesProducer.java      |  2 +
 .../lucene45/Lucene45DocValuesConsumer.java   |  5 +-
 .../lucene45/Lucene45DocValuesFormat.java     | 83 ++++++++++++-------
 .../lucene45/Lucene45DocValuesProducer.java   | 31 ++++++-
 .../lucene/index/BinaryDocValuesWriter.java   |  4 +-
 .../lucene/index/NumericDocValuesWriter.java  |  4 +-
 .../asserting/AssertingNormsFormat.java       |  1 -
 .../org/apache/lucene/codecs/package.html     | 25 ++++++
 .../index/BaseDocValuesFormatTestCase.java    |  2 +-
 .../org/apache/lucene/util/_TestUtil.java     | 11 ++-
 10 files changed, 120 insertions(+), 48 deletions(-)
 create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/package.html

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
index 04778aa1201..05dfcf1b0dd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@@ -75,6 +75,7 @@ public abstract class DocValuesProducer implements Closeable {
     final SortedDocValues in;
     final int maxDoc;
     
+    /** Creates a {@link Bits} returning true if the document has a value */
     public SortedDocsWithField(SortedDocValues in, int maxDoc) {
       this.in = in;
       this.maxDoc = maxDoc;
@@ -102,6 +103,7 @@ public abstract class DocValuesProducer implements Closeable {
     final SortedSetDocValues in;
     final int maxDoc;
     
+    /** Creates a {@link Bits} returning true if the document has a value */
     public SortedSetDocsWithField(SortedSetDocValues in, int maxDoc) {
       this.in = in;
       this.maxDoc = maxDoc;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
index 21ee03075f5..e5afdf70abb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
@@ -17,6 +17,7 @@ package org.apache.lucene.codecs.lucene45;
  * limitations under the License.
  */
 
+import java.io.Closeable; // javadocs
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -37,7 +38,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
 /** writer for {@link Lucene45DocValuesFormat} */
-public class Lucene45DocValuesConsumer extends DocValuesConsumer {
+public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Closeable {
 
   static final int BLOCK_SIZE = 16384;
   static final int ADDRESS_INTERVAL = 16;
@@ -59,6 +60,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
   final IndexOutput data, meta;
   final int maxDoc;
   
+  /** expert: Creates a new writer */
   public Lucene45DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     boolean success = false;
     try {
@@ -273,6 +275,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
     }
   }
   
+  /** expert: writes a value dictionary for a sorted/sortedset field */
   protected void addTermsDict(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
     // first check if its a "fixed-length" terms dict
     int minLength = Integer.MAX_VALUE;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
index 68a44370b30..3f3387ae244 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
@@ -60,19 +60,23 @@ import org.apache.lucene.util.packed.PackedInts;
  *        for each document. The addresses are written in blocks of 16k, with the current absolute
  *        start for the block, and the average (expected) delta per entry. For each document the 
  *        deviation from the delta (actual - expected) is written.
- *    <li>Prefix-compressed Binary: nocommit
+ *    <li>Prefix-compressed Binary: values are written in chunks of 16, with the first value written
+ *        completely and other values sharing prefixes. chunk addresses are written in blocks of 16k,
+ *        with the current absolute start for the block, and the average (expected) delta per entry. 
+ *        For each chunk the deviation from the delta (actual - expected) is written.
  * </ul>
  * <p>
  * {@link DocValuesType#SORTED SORTED}:
  * <ul>
- *    <li>Sorted: an FST mapping deduplicated terms to ordinals is written, along with the per-document
- *        ordinals written using one of the numeric strategies above.
+ *    <li>Sorted: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary, 
+ *        along with the per-document ordinals written using one of the numeric strategies above.
  * </ul>
  * <p>
  * {@link DocValuesType#SORTED_SET SORTED_SET}:
  * <ul>
- *    <li>SortedSet: an FST mapping deduplicated terms to ordinals is written, along with the per-document
- *        ordinal list written using one of the binary strategies above.  
+ *    <li>SortedSet: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary, 
+ *        an ordinal list and per-document index into this list are written using the numeric strategies 
+ *        above. 
  * </ul>
  * <p>
  * Files:
@@ -85,25 +89,35 @@ import org.apache.lucene.util.packed.PackedInts;
  *   <p>The DocValues metadata or .dvm file.</p>
  *   <p>For DocValues field, this stores metadata, such as the offset into the 
  *      DocValues data (.dvd)</p>
- *   <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup></p>
+ *   <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup></p>
  *   <ul>
- *     <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry</li>
- *     <li>NumericEntry --&gt; DataOffset,NumericCompressionType,PackedVersion</li>
- *     <li>BinaryEntry --&gt; DataOffset,DataLength,MinLength,MaxLength,PackedVersion?,BlockSize?</li>
- *     <li>SortedEntry --&gt; DataOffset,ValueCount</li>
+ *     <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li>
+ *     <li>NumericEntry --&gt; GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
+ *     <li>GCDNumericEntry --&gt; NumericHeader,MinValue,GCD</li>
+ *     <li>TableNumericEntry --&gt; NumericHeader,TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup></li>
+ *     <li>DeltaNumericEntry --&gt; NumericHeader</li>
+ *     <li>NumericHeader --&gt; FieldNumber,EntryType,NumericType,MissingOffset,PackedVersion,DataOffset,Count,BlockSize</li>
+ *     <li>BinaryEntry --&gt; FixedBinaryEntry | VariableBinaryEntry | PrefixBinaryEntry</li>
+ *     <li>FixedBinaryEntry --&gt; BinaryHeader</li>
+ *     <li>VariableBinaryEntry --&gt; BinaryHeader,AddressOffset,PackedVersion,BlockSize</li>
+ *     <li>PrefixBinaryEntry --&gt; BinaryHeader,AddressInterval,AddressOffset,PackedVersion,BlockSize</li>
+ *     <li>BinaryHeader --&gt; FieldNumber,EntryType,BinaryType,MissingOffset,MinLength,MaxLength,DataOffset</li>
+ *     <li>SortedEntry --&gt; FieldNumber,EntryType,BinaryEntry,NumericEntry</li>
+ *     <li>SortedSetEntry --&gt; EntryType,BinaryEntry,NumericEntry,NumericEntry</li>
  *     <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --&gt; {@link DataOutput#writeVInt VInt}</li>
- *     <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong Int64}</li>
  *     <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
  *     <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
+ *     <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --&gt; {@link DataOutput#writeLong Int64}</li>
+ *     <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
  *   </ul>
- *   <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
+ *   <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
  *      and an ordinary NumericEntry for the document-to-ord metadata.</p>
- *   <p>SortedSet fields have two entries: a SortedEntry with the FST metadata,
- *      and an ordinary BinaryEntry for the document-to-ord-list metadata.</p>
+ *   <p>SortedSet fields have three entries: a BinaryEntry with the value metadata,
+ *      and two NumericEntries for the document-to-ord-index and ordinal list metadata.</p>
  *   <p>FieldNumber of -1 indicates the end of metadata.</p>
- *   <p>EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)</p>
+ *   <p>EntryType is a 0 (NumericEntry) or 1 (BinaryEntry)</p>
  *   <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
- *   <p>NumericCompressionType indicates how Numeric values will be compressed:
+ *   <p>NumericType indicates how Numeric values will be compressed:
  *      <ul>
  *         <li>0 --&gt; delta-compressed. For each block of 16k integers, every integer is delta-encoded
  *             from the minimum value within the block. 
@@ -112,10 +126,18 @@ import org.apache.lucene.util.packed.PackedInts;
  *         <li>2 --&gt; table-compressed. When the number of unique numeric values is small and it would save space,
  *             a lookup table of unique values is written, followed by the ordinal for each document.
  *      </ul>
+ *   <p>BinaryType indicates how Binary values will be stored:
+ *      <ul>
+ *         <li>0 --&gt; fixed-width. All values have the same length, addressing by multiplication. 
+ *         <li>1 --&gt, variable-width. An address for each value is stored.
+ *         <li>2 --&gt; prefix-compressed. An address to the start of every interval'th value is stored.
+ *      </ul>
  *   <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
  *      If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
  *      Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
  *      is written for the addresses.
+ *   <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
+ *      If its -1, then there are no missing values.
  *   <li><a name="dvd" id="dvd"></a>
  *   <p>The DocValues data or .dvd file.</p>
  *   <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
@@ -125,21 +147,18 @@ import org.apache.lucene.util.packed.PackedInts;
  *     <li>BinaryData --&gt;  {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
  *     <li>SortedData --&gt; {@link FST FST&lt;Int64&gt;}</li>
  *     <li>DeltaCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
- *     <li>TableCompressedNumerics --&gt; TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
- *     <li>GCDCompressedNumerics --&gt; MinValue,GCD,{@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
+ *     <li>TableCompressedNumerics --&gt; {@link PackedInts PackedInts}</li>
+ *     <li>GCDCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
  *     <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
- *     <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
- *     <li>MinValue --&gt; {@link DataOutput#writeLong Int64}</li>
- *     <li>GCD --&gt; {@link DataOutput#writeLong Int64}</li>
  *   </ul>
  *   <p>SortedSet entries store the list of ordinals in their BinaryData as a
  *      sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
  * </ol>
  * @lucene.experimental
  */
-// nocommit: docs are incomplete
 public final class Lucene45DocValuesFormat extends DocValuesFormat {
 
+  /** Sole Constructor */
   public Lucene45DocValuesFormat() {
     super("Lucene45");
   }
@@ -154,14 +173,14 @@ public final class Lucene45DocValuesFormat extends DocValuesFormat {
     return new Lucene45DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
   }
   
-  public static final String DATA_CODEC = "Lucene45DocValuesData";
-  public static final String DATA_EXTENSION = "dvd";
-  public static final String META_CODEC = "Lucene45ValuesMetadata";
-  public static final String META_EXTENSION = "dvm";
-  public static final int VERSION_START = 0;
-  public static final int VERSION_CURRENT = VERSION_START;
-  public static final byte NUMERIC = 0;
-  public static final byte BINARY = 1;
-  public static final byte SORTED = 2;
-  public static final byte SORTED_SET = 3;
+  static final String DATA_CODEC = "Lucene45DocValuesData";
+  static final String DATA_EXTENSION = "dvd";
+  static final String META_CODEC = "Lucene45ValuesMetadata";
+  static final String META_EXTENSION = "dvm";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+  static final byte NUMERIC = 0;
+  static final byte BINARY = 1;
+  static final byte SORTED = 2;
+  static final byte SORTED_SET = 3;
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
index b1ca3a8cf60..b12fa6d75d9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
@@ -25,6 +25,7 @@ import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY
 import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
 import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
 
+import java.io.Closeable; // javadocs
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -53,7 +54,8 @@ import org.apache.lucene.util.packed.BlockPackedReader;
 import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
 import org.apache.lucene.util.packed.PackedInts;
 
-public class Lucene45DocValuesProducer extends DocValuesProducer {
+/** reader for {@link Lucene45DocValuesFormat} */
+public class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable {
   private final Map<Integer,NumericEntry> numerics;
   private final Map<Integer,BinaryEntry> binaries;
   private final Map<Integer,NumericEntry> ords;
@@ -65,6 +67,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
   private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
   private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
   
+  /** expert: instantiates a new reader */
   protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
     // read in the entries from the metadata file.
@@ -317,6 +320,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
+  /** returns an address instance for variable-length binary values.
+   *  @lucene.internal */
   protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
     final MonotonicBlockPackedReader addresses;
     synchronized (addressInstances) {
@@ -358,6 +363,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
+  /** returns an address instance for prefix-compressed binary values. 
+   * @lucene.internal */
   protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
     final MonotonicBlockPackedReader addresses;
     final long interval = bytes.addressInterval;
@@ -434,6 +441,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
+  /** returns an address instance for sortedset ordinal lists
+   * @lucene.internal */
   protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
     final MonotonicBlockPackedReader ordIndex;
     synchronized (ordIndexInstances) {
@@ -509,7 +518,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     };
   }
   
-  public Bits getMissingBits(final long offset) throws IOException {
+  private Bits getMissingBits(final long offset) throws IOException {
     if (offset == -1) {
       return new Bits.MatchAllBits(maxDoc);
     } else {
@@ -557,13 +566,20 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     data.close();
   }
   
+  /** metadata entry for a numeric docvalues field */
   protected static class NumericEntry {
+    private NumericEntry() {}
+    /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
     long missingOffset;
+    /** offset to the actual numeric values */
     public long offset;
 
-    public int format;
+    int format;
+    /** packed ints version used to encode these numerics */
     public int packedIntsVersion;
+    /** count of values written */
     public long count;
+    /** packed ints blocksize */
     public int blockSize;
     
     long minValue;
@@ -571,17 +587,26 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
     long table[];
   }
   
+  /** metadata entry for a binary docvalues field */
   protected static class BinaryEntry {
+    private BinaryEntry() {}
+    /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
     long missingOffset;
+    /** offset to the actual binary values */
     long offset;
 
     int format;
+    /** count of values written */
     public long count;
     int minLength;
     int maxLength;
+    /** offset to the addressing data that maps a value to its slice of the byte[] */
     public long addressesOffset;
+    /** interval of shared prefix chunks (when using prefix-compressed binary) */
     public long addressInterval;
+    /** packed ints version used to encode addressing information */
     public int packedIntsVersion;
+    /** packed ints blocksize */
     public int blockSize;
   }
   
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index 3a3e301a9b2..f9f82317b62 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -96,8 +96,8 @@ class BinaryDocValuesWriter extends DocValuesWriter {
   }
   
   private long docsWithFieldBytesUsed() {
-    // nocommit: this is not correct
-    return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
+    // size of the long[] + some overhead
+    return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
   }
 
   private void updateBytesUsed() {
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index 7c5aa83fdae..08f065e1df2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -70,8 +70,8 @@ class NumericDocValuesWriter extends DocValuesWriter {
   }
   
   private long docsWithFieldBytesUsed() {
-    // nocommit: this is not correct
-    return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
+    // size of the long[] + some overhead
+    return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
   }
 
   private void updateBytesUsed() {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
index 5579af6245f..8b64401b452 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
@@ -28,7 +28,6 @@ import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 
-// nocommit
 /**
  * Just like {@link Lucene42NormsFormat} but with additional asserts.
  */
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/package.html
new file mode 100644
index 00000000000..ca70ffc3b2e
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/package.html
@@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Codecs for testing (simulate old disk formats, wacky theoretical use cases, etc)
+</body>
+</html>
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index 48e23848aea..c1902cd93eb 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -2538,7 +2538,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     d.close();
   }
 
-  // nocommit: get this out of here and into the deprecated codecs (4.0, 4.2)
+  // TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
   public void testHugeBinaryValueLimit() throws Exception {
     // We only test DVFormats that have a limit
     assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
index d79c948ed7e..b1a43a0fa59 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@@ -757,14 +757,13 @@ public class _TestUtil {
     }
   }
 
-  // nocommit: remove this, push this test to Lucene40/Lucene42 codec tests
+  // TODO: remove this, push this test to Lucene40/Lucene42 codec tests
   public static boolean fieldSupportsHugeBinaryDocValues(String field) {
     String dvFormat = getDocValuesFormat(field);
-    System.out.println(dvFormat);
-    return dvFormat.equals("Lucene45") ||
-      dvFormat.equals("Asserting") || 
-      dvFormat.equals("Disk") ||
-      dvFormat.equals("SimpleText");
+    if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42")) {
+      return false;
+    }
+    return true;
   }
 
   public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {

From 28316a161c90216b50d63fcd3ba8084c99b895eb Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 20:11:03 +0000
Subject: [PATCH 10/16] add memorydv with missing support

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515582 13f79535-47bb-0310-9956-ffa450edef68
---
 .../memory/MemoryDocValuesConsumer.java       | 403 +++++++++++
 .../codecs/memory/MemoryDocValuesFormat.java  |  72 ++
 .../memory/MemoryDocValuesProducer.java       | 633 ++++++++++++++++++
 .../apache/lucene/codecs/memory/package.html  |   2 +-
 .../org.apache.lucene.codecs.DocValuesFormat  |   1 +
 .../memory/TestMemoryDocValuesFormat.java     |  39 ++
 .../org/apache/lucene/index/RandomCodec.java  |   2 +
 .../org/apache/lucene/util/_TestUtil.java     |   2 +-
 8 files changed, 1152 insertions(+), 2 deletions(-)
 create mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java
 create mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java
 create mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
 create mode 100644 lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryDocValuesFormat.java

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java
new file mode 100644
index 00000000000..2b3b9901fc5
--- /dev/null
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java
@@ -0,0 +1,403 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.MathUtil;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST.INPUT_TYPE;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
+import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
+import org.apache.lucene.util.packed.PackedInts;
+
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.VERSION_CURRENT;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_SIZE;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BYTES;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.NUMBER;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.FST;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.TABLE_COMPRESSED;
+import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.UNCOMPRESSED;
+
+/**
+ * Writer for {@link MemoryDocValuesFormat}
+ */
+class MemoryDocValuesConsumer extends DocValuesConsumer {
+  final IndexOutput data, meta;
+  final int maxDoc;
+  final float acceptableOverheadRatio;
+  
+  MemoryDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, float acceptableOverheadRatio) throws IOException {
+    this.acceptableOverheadRatio = acceptableOverheadRatio;
+    maxDoc = state.segmentInfo.getDocCount();
+    boolean success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.createOutput(dataName, state.context);
+      CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
+      String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+      meta = state.directory.createOutput(metaName, state.context);
+      CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+
+  @Override
+  public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+    addNumericField(field, values, true);
+  }
+
+  void addNumericField(FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws IOException {
+    meta.writeVInt(field.number);
+    meta.writeByte(NUMBER);
+    meta.writeLong(data.getFilePointer());
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    long gcd = 0;
+    boolean missing = false;
+    // TODO: more efficient?
+    HashSet<Long> uniqueValues = null;
+    if (optimizeStorage) {
+      uniqueValues = new HashSet<>();
+
+      long count = 0;
+      for (Number nv : values) {
+        final long v;
+        if (nv == null) {
+          v = 0;
+          missing = true;
+        } else {
+          v = nv.longValue();
+        }
+
+        if (gcd != 1) {
+          if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
+            // in that case v - minValue might overflow and make the GCD computation return
+            // wrong results. Since these extreme values are unlikely, we just discard
+            // GCD computation for them
+            gcd = 1;
+          } else if (count != 0) { // minValue needs to be set first
+            gcd = MathUtil.gcd(gcd, v - minValue);
+          }
+        }
+
+        minValue = Math.min(minValue, v);
+        maxValue = Math.max(maxValue, v);
+
+        if (uniqueValues != null) {
+          if (uniqueValues.add(v)) {
+            if (uniqueValues.size() > 256) {
+              uniqueValues = null;
+            }
+          }
+        }
+
+        ++count;
+      }
+      assert count == maxDoc;
+    }
+    
+    if (missing) {
+      long start = data.getFilePointer();
+      writeMissingBitset(values);
+      meta.writeLong(start);
+      meta.writeLong(data.getFilePointer() - start);
+    } else {
+      meta.writeLong(-1L);
+    }
+
+    if (uniqueValues != null) {
+      // small number of unique values
+      final int bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
+      FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
+      if (formatAndBits.bitsPerValue == 8 && minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
+        meta.writeByte(UNCOMPRESSED); // uncompressed
+        for (Number nv : values) {
+          data.writeByte(nv == null ? 0 : (byte) nv.longValue());
+        }
+      } else {
+        meta.writeByte(TABLE_COMPRESSED); // table-compressed
+        Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
+        final HashMap<Long,Integer> encode = new HashMap<Long,Integer>();
+        data.writeVInt(decode.length);
+        for (int i = 0; i < decode.length; i++) {
+          data.writeLong(decode[i]);
+          encode.put(decode[i], i);
+        }
+
+        meta.writeVInt(PackedInts.VERSION_CURRENT);
+        data.writeVInt(formatAndBits.format.getId());
+        data.writeVInt(formatAndBits.bitsPerValue);
+
+        final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
+        for(Number nv : values) {
+          writer.add(encode.get(nv == null ? 0 : nv.longValue()));
+        }
+        writer.finish();
+      }
+    } else if (gcd != 0 && gcd != 1) {
+      meta.writeByte(GCD_COMPRESSED);
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      data.writeLong(minValue);
+      data.writeLong(gcd);
+      data.writeVInt(BLOCK_SIZE);
+
+      final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+      for (Number nv : values) {
+        long value = nv == null ? 0 : nv.longValue();
+        writer.add((value - minValue) / gcd);
+      }
+      writer.finish();
+    } else {
+      meta.writeByte(DELTA_COMPRESSED); // delta-compressed
+
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      data.writeVInt(BLOCK_SIZE);
+
+      final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+      for (Number nv : values) {
+        writer.add(nv == null ? 0 : nv.longValue());
+      }
+      writer.finish();
+    }
+  }
+  
+  @Override
+  public void close() throws IOException {
+    boolean success = false;
+    try {
+      if (meta != null) {
+        meta.writeVInt(-1); // write EOF marker
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(data, meta);
+      } else {
+        IOUtils.closeWhileHandlingException(data, meta);
+      }
+    }
+  }
+
+  @Override
+  public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
+    // write the byte[] data
+    meta.writeVInt(field.number);
+    meta.writeByte(BYTES);
+    int minLength = Integer.MAX_VALUE;
+    int maxLength = Integer.MIN_VALUE;
+    final long startFP = data.getFilePointer();
+    boolean missing = false;
+    for(BytesRef v : values) {
+      final int length;
+      if (v == null) {
+        length = 0;
+        missing = true;
+      } else {
+        length = v.length;
+      }
+      if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
+        throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH);
+      }
+      minLength = Math.min(minLength, length);
+      maxLength = Math.max(maxLength, length);
+      if (v != null) {
+        data.writeBytes(v.bytes, v.offset, v.length);
+      }
+    }
+    meta.writeLong(startFP);
+    meta.writeLong(data.getFilePointer() - startFP);
+    if (missing) {
+      long start = data.getFilePointer();
+      writeMissingBitset(values);
+      meta.writeLong(start);
+      meta.writeLong(data.getFilePointer() - start);
+    } else {
+      meta.writeLong(-1L);
+    }
+    meta.writeVInt(minLength);
+    meta.writeVInt(maxLength);
+    
+    // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
+    // otherwise, we need to record the length fields...
+    if (minLength != maxLength) {
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      meta.writeVInt(BLOCK_SIZE);
+
+      final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
+      long addr = 0;
+      for (BytesRef v : values) {
+        if (v != null) {
+          addr += v.length;
+        }
+        writer.add(addr);
+      }
+      writer.finish();
+    }
+  }
+  
+  private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+    meta.writeVInt(field.number);
+    meta.writeByte(FST);
+    meta.writeLong(data.getFilePointer());
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
+    Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
+    IntsRef scratch = new IntsRef();
+    long ord = 0;
+    for (BytesRef v : values) {
+      builder.add(Util.toIntsRef(v, scratch), ord);
+      ord++;
+    }
+    FST<Long> fst = builder.finish();
+    if (fst != null) {
+      fst.save(data);
+    }
+    meta.writeVLong(ord);
+  }
+  
+  // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
+  // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
+  void writeMissingBitset(Iterable<?> values) throws IOException {
+    long bits = 0;
+    int count = 0;
+    for (Object v : values) {
+      if (count == 64) {
+        data.writeLong(bits);
+        count = 0;
+        bits = 0;
+      }
+      if (v != null) {
+        bits |= 1L << (count & 0x3f);
+      }
+      count++;
+    }
+    if (count > 0) {
+      data.writeLong(bits);
+    }
+  }
+
+  @Override
+  public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+    // write the ordinals as numerics
+    addNumericField(field, docToOrd, false);
+    
+    // write the values as FST
+    writeFST(field, values);
+  }
+
+  // note: this might not be the most efficient... but its fairly simple
+  @Override
+  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
+    // write the ordinals as a binary field
+    addBinaryField(field, new Iterable<BytesRef>() {
+      @Override
+      public Iterator<BytesRef> iterator() {
+        return new SortedSetIterator(docToOrdCount.iterator(), ords.iterator());
+      }
+    });
+      
+    // write the values as FST
+    writeFST(field, values);
+  }
+  
+  // per-document vint-encoded byte[]
+  static class SortedSetIterator implements Iterator<BytesRef> {
+    byte[] buffer = new byte[10];
+    ByteArrayDataOutput out = new ByteArrayDataOutput();
+    BytesRef ref = new BytesRef();
+    
+    final Iterator<Number> counts;
+    final Iterator<Number> ords;
+    
+    SortedSetIterator(Iterator<Number> counts, Iterator<Number> ords) {
+      this.counts = counts;
+      this.ords = ords;
+    }
+    
+    @Override
+    public boolean hasNext() {
+      return counts.hasNext();
+    }
+
+    @Override
+    public BytesRef next() {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      
+      int count = counts.next().intValue();
+      int maxSize = count*9; // worst case
+      if (maxSize > buffer.length) {
+        buffer = ArrayUtil.grow(buffer, maxSize);
+      }
+      
+      try {
+        encodeValues(count);
+      } catch (IOException bogus) {
+        throw new RuntimeException(bogus);
+      }
+      
+      ref.bytes = buffer;
+      ref.offset = 0;
+      ref.length = out.getPosition();
+
+      return ref;
+    }
+    
+    // encodes count values to buffer
+    private void encodeValues(int count) throws IOException {
+      out.reset(buffer);
+      long lastOrd = 0;
+      for (int i = 0; i < count; i++) {
+        long ord = ords.next().longValue();
+        out.writeVLong(ord - lastOrd);
+        lastOrd = ord;
+      }
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java
new file mode 100644
index 00000000000..2f6216db08b
--- /dev/null
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java
@@ -0,0 +1,72 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.packed.PackedInts;
+
+/** In-memory docvalues format */
+public class MemoryDocValuesFormat extends DocValuesFormat {
+
+  /** Maximum length for each binary doc values field. */
+  public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
+  
+  final float acceptableOverheadRatio;
+  
+  /** 
+   * Calls {@link #MemoryDocValuesFormat(float) 
+   * MemoryDocValuesFormat(PackedInts.DEFAULT)} 
+   */
+  public MemoryDocValuesFormat() {
+    this(PackedInts.DEFAULT);
+  }
+  
+  /**
+   * Creates a new MemoryDocValuesFormat with the specified
+   * <code>acceptableOverheadRatio</code> for NumericDocValues.
+   * @param acceptableOverheadRatio compression parameter for numerics. 
+   *        Currently this is only used when the number of unique values is small.
+   *        
+   * @lucene.experimental
+   */
+  public MemoryDocValuesFormat(float acceptableOverheadRatio) {
+    super("Memory");
+    this.acceptableOverheadRatio = acceptableOverheadRatio;
+  }
+
+  @Override
+  public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    return new MemoryDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
+  }
+  
+  @Override
+  public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
+    return new MemoryDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+  }
+  
+  static final String DATA_CODEC = "MemoryDocValuesData";
+  static final String DATA_EXTENSION = "mdvd";
+  static final String METADATA_CODEC = "MemoryDocValuesMetadata";
+  static final String METADATA_EXTENSION = "mdvm";
+}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
new file mode 100644
index 00000000000..46ed8b8e9b9
--- /dev/null
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
@@ -0,0 +1,633 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.fst.BytesRefFSTEnum;
+import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * Reader for {@link MemoryDocValuesFormat}
+ */
+class MemoryDocValuesProducer extends DocValuesProducer {
+  // metadata maps (just file pointers and minimal stuff)
+  private final Map<Integer,NumericEntry> numerics;
+  private final Map<Integer,BinaryEntry> binaries;
+  private final Map<Integer,FSTEntry> fsts;
+  private final IndexInput data;
+  
+  // ram instances we have already loaded
+  private final Map<Integer,NumericDocValues> numericInstances = 
+      new HashMap<Integer,NumericDocValues>();
+  private final Map<Integer,BinaryDocValues> binaryInstances =
+      new HashMap<Integer,BinaryDocValues>();
+  private final Map<Integer,FST<Long>> fstInstances =
+      new HashMap<Integer,FST<Long>>();
+  private final Map<Integer,Bits> docsWithFieldInstances = new HashMap<Integer,Bits>();
+  
+  private final int maxDoc;
+  
+  
+  static final byte NUMBER = 0;
+  static final byte BYTES = 1;
+  static final byte FST = 2;
+
+  static final int BLOCK_SIZE = 4096;
+  
+  static final byte DELTA_COMPRESSED = 0;
+  static final byte TABLE_COMPRESSED = 1;
+  static final byte UNCOMPRESSED = 2;
+  static final byte GCD_COMPRESSED = 3;
+  
+  static final int VERSION_START = 0;
+  static final int VERSION_GCD_COMPRESSION = 1;
+  static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
+    
+  MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+    maxDoc = state.segmentInfo.getDocCount();
+    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+    // read in the entries from the metadata file.
+    IndexInput in = state.directory.openInput(metaName, state.context);
+    boolean success = false;
+    final int version;
+    try {
+      version = CodecUtil.checkHeader(in, metaCodec, 
+                                      VERSION_START,
+                                      VERSION_CURRENT);
+      numerics = new HashMap<Integer,NumericEntry>();
+      binaries = new HashMap<Integer,BinaryEntry>();
+      fsts = new HashMap<Integer,FSTEntry>();
+      readFields(in, state.fieldInfos);
+
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(in);
+      } else {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
+
+    success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.openInput(dataName, state.context);
+      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
+                                                 VERSION_START,
+                                                 VERSION_CURRENT);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch");
+      }
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
+  }
+  
+  private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
+    int fieldNumber = meta.readVInt();
+    while (fieldNumber != -1) {
+      int fieldType = meta.readByte();
+      if (fieldType == NUMBER) {
+        NumericEntry entry = new NumericEntry();
+        entry.offset = meta.readLong();
+        entry.missingOffset = meta.readLong();
+        if (entry.missingOffset != -1) {
+          entry.missingBytes = meta.readLong();
+        } else {
+          entry.missingBytes = 0;
+        }
+        entry.format = meta.readByte();
+        switch(entry.format) {
+          case DELTA_COMPRESSED:
+          case TABLE_COMPRESSED:
+          case GCD_COMPRESSED:
+          case UNCOMPRESSED:
+               break;
+          default:
+               throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+        }
+        if (entry.format != UNCOMPRESSED) {
+          entry.packedIntsVersion = meta.readVInt();
+        }
+        numerics.put(fieldNumber, entry);
+      } else if (fieldType == BYTES) {
+        BinaryEntry entry = new BinaryEntry();
+        entry.offset = meta.readLong();
+        entry.numBytes = meta.readLong();
+        entry.missingOffset = meta.readLong();
+        if (entry.missingOffset != -1) {
+          entry.missingBytes = meta.readLong();
+        } else {
+          entry.missingBytes = 0;
+        }
+        entry.minLength = meta.readVInt();
+        entry.maxLength = meta.readVInt();
+        if (entry.minLength != entry.maxLength) {
+          entry.packedIntsVersion = meta.readVInt();
+          entry.blockSize = meta.readVInt();
+        }
+        binaries.put(fieldNumber, entry);
+      } else if (fieldType == FST) {
+        FSTEntry entry = new FSTEntry();
+        entry.offset = meta.readLong();
+        entry.numOrds = meta.readVLong();
+        fsts.put(fieldNumber, entry);
+      } else {
+        throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
+      }
+      fieldNumber = meta.readVInt();
+    }
+  }
+
+  @Override
+  public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
+    NumericDocValues instance = numericInstances.get(field.number);
+    if (instance == null) {
+      instance = loadNumeric(field);
+      numericInstances.put(field.number, instance);
+    }
+    return instance;
+  }
+  
+  private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
+    NumericEntry entry = numerics.get(field.number);
+    data.seek(entry.offset + entry.missingBytes);
+    switch (entry.format) {
+      case TABLE_COMPRESSED:
+        int size = data.readVInt();
+        if (size > 256) {
+          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
+        }
+        final long decode[] = new long[size];
+        for (int i = 0; i < decode.length; i++) {
+          decode[i] = data.readLong();
+        }
+        final int formatID = data.readVInt();
+        final int bitsPerValue = data.readVInt();
+        final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return decode[(int)ordsReader.get(docID)];
+          }
+        };
+      case DELTA_COMPRESSED:
+        final int blockSize = data.readVInt();
+        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return reader.get(docID);
+          }
+        };
+      case UNCOMPRESSED:
+        final byte bytes[] = new byte[maxDoc];
+        data.readBytes(bytes, 0, bytes.length);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return bytes[docID];
+          }
+        };
+      case GCD_COMPRESSED:
+        final long min = data.readLong();
+        final long mult = data.readLong();
+        final int quotientBlockSize = data.readVInt();
+        final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return min + mult * quotientReader.get(docID);
+          }
+        };
+      default:
+        throw new AssertionError();
+    }
+  }
+
+  @Override
+  public synchronized BinaryDocValues getBinary(FieldInfo field) throws IOException {
+    BinaryDocValues instance = binaryInstances.get(field.number);
+    if (instance == null) {
+      instance = loadBinary(field);
+      binaryInstances.put(field.number, instance);
+    }
+    return instance;
+  }
+  
+  private BinaryDocValues loadBinary(FieldInfo field) throws IOException {
+    BinaryEntry entry = binaries.get(field.number);
+    data.seek(entry.offset);
+    PagedBytes bytes = new PagedBytes(16);
+    bytes.copy(data, entry.numBytes);
+    final PagedBytes.Reader bytesReader = bytes.freeze(true);
+    if (entry.minLength == entry.maxLength) {
+      final int fixedLength = entry.minLength;
+      return new BinaryDocValues() {
+        @Override
+        public void get(int docID, BytesRef result) {
+          bytesReader.fillSlice(result, fixedLength * (long)docID, fixedLength);
+        }
+      };
+    } else {
+      data.seek(data.getFilePointer() + entry.missingBytes);
+      final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false);
+      return new BinaryDocValues() {
+        @Override
+        public void get(int docID, BytesRef result) {
+          long startAddress = docID == 0 ? 0 : addresses.get(docID-1);
+          long endAddress = addresses.get(docID); 
+          bytesReader.fillSlice(result, startAddress, (int) (endAddress - startAddress));
+        }
+      };
+    }
+  }
+  
+  @Override
+  public SortedDocValues getSorted(FieldInfo field) throws IOException {
+    final FSTEntry entry = fsts.get(field.number);
+    if (entry.numOrds == 0) {
+      return SortedDocValues.EMPTY;
+    }
+    FST<Long> instance;
+    synchronized(this) {
+      instance = fstInstances.get(field.number);
+      if (instance == null) {
+        data.seek(entry.offset);
+        instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
+        fstInstances.put(field.number, instance);
+      }
+    }
+    final NumericDocValues docToOrd = getNumeric(field);
+    final FST<Long> fst = instance;
+    
+    // per-thread resources
+    final BytesReader in = fst.getBytesReader();
+    final Arc<Long> firstArc = new Arc<Long>();
+    final Arc<Long> scratchArc = new Arc<Long>();
+    final IntsRef scratchInts = new IntsRef();
+    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst); 
+    
+    return new SortedDocValues() {
+      @Override
+      public int getOrd(int docID) {
+        return (int) docToOrd.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        try {
+          in.setPosition(0);
+          fst.getFirstArc(firstArc);
+          IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
+          result.bytes = new byte[output.length];
+          result.offset = 0;
+          result.length = 0;
+          Util.toBytesRef(output, result);
+        } catch (IOException bogus) {
+          throw new RuntimeException(bogus);
+        }
+      }
+
+      @Override
+      public int lookupTerm(BytesRef key) {
+        try {
+          InputOutput<Long> o = fstEnum.seekCeil(key);
+          if (o == null) {
+            return -getValueCount()-1;
+          } else if (o.input.equals(key)) {
+            return o.output.intValue();
+          } else {
+            return (int) -o.output-1;
+          }
+        } catch (IOException bogus) {
+          throw new RuntimeException(bogus);
+        }
+      }
+
+      @Override
+      public int getValueCount() {
+        return (int)entry.numOrds;
+      }
+
+      @Override
+      public TermsEnum termsEnum() {
+        return new FSTTermsEnum(fst);
+      }
+    };
+  }
+  
+  @Override
+  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+    final FSTEntry entry = fsts.get(field.number);
+    if (entry.numOrds == 0) {
+      return SortedSetDocValues.EMPTY; // empty FST!
+    }
+    FST<Long> instance;
+    synchronized(this) {
+      instance = fstInstances.get(field.number);
+      if (instance == null) {
+        data.seek(entry.offset);
+        instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
+        fstInstances.put(field.number, instance);
+      }
+    }
+    final BinaryDocValues docToOrds = getBinary(field);
+    final FST<Long> fst = instance;
+    
+    // per-thread resources
+    final BytesReader in = fst.getBytesReader();
+    final Arc<Long> firstArc = new Arc<Long>();
+    final Arc<Long> scratchArc = new Arc<Long>();
+    final IntsRef scratchInts = new IntsRef();
+    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst); 
+    final BytesRef ref = new BytesRef();
+    final ByteArrayDataInput input = new ByteArrayDataInput();
+    return new SortedSetDocValues() {
+      long currentOrd;
+
+      @Override
+      public long nextOrd() {
+        if (input.eof()) {
+          return NO_MORE_ORDS;
+        } else {
+          currentOrd += input.readVLong();
+          return currentOrd;
+        }
+      }
+      
+      @Override
+      public void setDocument(int docID) {
+        docToOrds.get(docID, ref);
+        input.reset(ref.bytes, ref.offset, ref.length);
+        currentOrd = 0;
+      }
+
+      @Override
+      public void lookupOrd(long ord, BytesRef result) {
+        try {
+          in.setPosition(0);
+          fst.getFirstArc(firstArc);
+          IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
+          result.bytes = new byte[output.length];
+          result.offset = 0;
+          result.length = 0;
+          Util.toBytesRef(output, result);
+        } catch (IOException bogus) {
+          throw new RuntimeException(bogus);
+        }
+      }
+
+      @Override
+      public long lookupTerm(BytesRef key) {
+        try {
+          InputOutput<Long> o = fstEnum.seekCeil(key);
+          if (o == null) {
+            return -getValueCount()-1;
+          } else if (o.input.equals(key)) {
+            return o.output.intValue();
+          } else {
+            return -o.output-1;
+          }
+        } catch (IOException bogus) {
+          throw new RuntimeException(bogus);
+        }
+      }
+
+      @Override
+      public long getValueCount() {
+        return entry.numOrds;
+      }
+
+      @Override
+      public TermsEnum termsEnum() {
+        return new FSTTermsEnum(fst);
+      }
+    };
+  }
+  
+  private Bits getMissingBits(int fieldNumber, final long offset, final long length) throws IOException {
+    if (offset == -1) {
+      return new Bits.MatchAllBits(maxDoc);
+    } else {
+      Bits instance;
+      synchronized(this) {
+        instance = docsWithFieldInstances.get(fieldNumber);
+        if (instance == null) {
+          IndexInput data = this.data.clone();
+          data.seek(offset);
+          assert length % 8 == 0;
+          long bits[] = new long[(int) length >> 3];
+          for (int i = 0; i < bits.length; i++) {
+            bits[i] = data.readLong();
+          }
+          instance = new FixedBitSet(bits, maxDoc);
+          docsWithFieldInstances.put(fieldNumber, instance);
+        }
+      }
+      return instance;
+    }
+  }
+  
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    switch(field.getDocValuesType()) {
+      case SORTED_SET:
+        return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+      case SORTED:
+        return new SortedDocsWithField(getSorted(field), maxDoc);
+      case BINARY:
+        BinaryEntry be = binaries.get(field.number);
+        return getMissingBits(field.number, be.missingOffset, be.missingBytes);
+      case NUMERIC:
+        NumericEntry ne = numerics.get(field.number);
+        return getMissingBits(field.number, ne.missingOffset, ne.missingBytes);
+      default: 
+        throw new AssertionError();
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    data.close();
+  }
+  
+  static class NumericEntry {
+    long offset;
+    long missingOffset;
+    long missingBytes;
+    byte format;
+    int packedIntsVersion;
+  }
+  
+  static class BinaryEntry {
+    long offset;
+    long missingOffset;
+    long missingBytes;
+    long numBytes;
+    int minLength;
+    int maxLength;
+    int packedIntsVersion;
+    int blockSize;
+  }
+  
+  static class FSTEntry {
+    long offset;
+    long numOrds;
+  }
+  
+  // exposes FSTEnum directly as a TermsEnum: avoids binary-search next()
+  static class FSTTermsEnum extends TermsEnum {
+    final BytesRefFSTEnum<Long> in;
+    
+    // this is all for the complicated seek(ord)...
+    // maybe we should add a FSTEnum that supports this operation?
+    final FST<Long> fst;
+    final FST.BytesReader bytesReader;
+    final Arc<Long> firstArc = new Arc<Long>();
+    final Arc<Long> scratchArc = new Arc<Long>();
+    final IntsRef scratchInts = new IntsRef();
+    final BytesRef scratchBytes = new BytesRef();
+    
+    FSTTermsEnum(FST<Long> fst) {
+      this.fst = fst;
+      in = new BytesRefFSTEnum<Long>(fst);
+      bytesReader = fst.getBytesReader();
+    }
+
+    @Override
+    public BytesRef next() throws IOException {
+      InputOutput<Long> io = in.next();
+      if (io == null) {
+        return null;
+      } else {
+        return io.input;
+      }
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
+      if (in.seekCeil(text) == null) {
+        return SeekStatus.END;
+      } else if (term().equals(text)) {
+        // TODO: add SeekStatus to FSTEnum like in https://issues.apache.org/jira/browse/LUCENE-3729
+        // to remove this comparision?
+        return SeekStatus.FOUND;
+      } else {
+        return SeekStatus.NOT_FOUND;
+      }
+    }
+
+    @Override
+    public boolean seekExact(BytesRef text) throws IOException {
+      if (in.seekExact(text) == null) {
+        return false;
+      } else {
+        return true;
+      }
+    }
+
+    @Override
+    public void seekExact(long ord) throws IOException {
+      // TODO: would be better to make this simpler and faster.
+      // but we dont want to introduce a bug that corrupts our enum state!
+      bytesReader.setPosition(0);
+      fst.getFirstArc(firstArc);
+      IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
+      scratchBytes.bytes = new byte[output.length];
+      scratchBytes.offset = 0;
+      scratchBytes.length = 0;
+      Util.toBytesRef(output, scratchBytes);
+      // TODO: we could do this lazily, better to try to push into FSTEnum though?
+      in.seekExact(scratchBytes);
+    }
+
+    @Override
+    public BytesRef term() throws IOException {
+      return in.current().input;
+    }
+
+    @Override
+    public long ord() throws IOException {
+      return in.current().output;
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
+}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
index 340e8316908..468cc07257e 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
@@ -20,6 +20,6 @@
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
-Postings format that is read entirely into memory.
+Postings and DocValues formats that are read entirely into memory.
 </body>
 </html>
\ No newline at end of file
diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
index e289c4d0966..5103c529355 100644
--- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@@ -14,4 +14,5 @@
 #  limitations under the License.
 
 org.apache.lucene.codecs.diskdv.DiskDocValuesFormat
+org.apache.lucene.codecs.memory.MemoryDocValuesFormat
 org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat
diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryDocValuesFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryDocValuesFormat.java
new file mode 100644
index 00000000000..77c6ea582a8
--- /dev/null
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryDocValuesFormat.java
@@ -0,0 +1,39 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Tests MemoryDocValuesFormat
+ */
+public class TestMemoryDocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
+  private final Codec codec = _TestUtil.alwaysDocValuesFormat(new MemoryDocValuesFormat());
+
+  @Override
+  protected Codec getCodec() {
+    return codec;
+  }
+
+  @Override
+  protected boolean codecAcceptsHugeBinaryValues(String field) {
+    return false;
+  }
+}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index 239be0f34c7..3530a31ce21 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -40,6 +40,7 @@ import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
 import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
 import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
 import org.apache.lucene.codecs.memory.DirectPostingsFormat;
+import org.apache.lucene.codecs.memory.MemoryDocValuesFormat;
 import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
 import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
 import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
@@ -149,6 +150,7 @@ public class RandomCodec extends Lucene45Codec {
     addDocValues(avoidCodecs,
         new Lucene45DocValuesFormat(),
         new DiskDocValuesFormat(),
+        new MemoryDocValuesFormat(),
         new SimpleTextDocValuesFormat(),
         new AssertingDocValuesFormat());
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
index b1a43a0fa59..19c16bcd9bc 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@@ -760,7 +760,7 @@ public class _TestUtil {
   // TODO: remove this, push this test to Lucene40/Lucene42 codec tests
   public static boolean fieldSupportsHugeBinaryDocValues(String field) {
     String dvFormat = getDocValuesFormat(field);
-    if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42")) {
+    if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42") || dvFormat.equals("Memory")) {
       return false;
     }
     return true;

From 863c2191226ce9a791ee693795e0d211f9d7419d Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 21:08:42 +0000
Subject: [PATCH 11/16] add checks

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515605 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/index/AssertingAtomicReader.java   | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
index eb6b20df5eb..1bdb14cf574 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
@@ -608,6 +608,32 @@ public class AssertingAtomicReader extends FilterAtomicReader {
     }
   }
 
+  @Override
+  public Bits getLiveDocs() {
+    Bits liveDocs = super.getLiveDocs();
+    if (liveDocs != null) {
+      assert maxDoc() == liveDocs.length();
+    } else {
+      assert maxDoc() == numDocs();
+      assert !hasDeletions();
+    }
+    return liveDocs;
+  }
+
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    Bits docsWithField = super.getDocsWithField(field);
+    FieldInfo fi = getFieldInfos().fieldInfo(field);
+    if (docsWithField != null) {
+      assert fi != null;
+      assert fi.hasDocValues();
+      assert maxDoc() == docsWithField.length();
+    } else {
+      assert fi == null || fi.hasDocValues() == false;
+    }
+    return docsWithField;
+  }
+
   // this is the same hack as FCInvisible
   @Override
   public Object getCoreCacheKey() {

From bd5ca555fea6af0ab4a59ec4587137265bccc1b1 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 19 Aug 2013 21:27:45 +0000
Subject: [PATCH 12/16] improve tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515613 13f79535-47bb-0310-9956-ffa450edef68
---
 .../index/BaseDocValuesFormatTestCase.java    | 102 ++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index c1902cd93eb..8dfaf0cec3f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -1232,6 +1232,73 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     dir.close();
   }
   
+  private void doTestMissingVsFieldCache(final long minValue, final long maxValue) throws Exception {
+    doTestMissingVsFieldCache(new LongProducer() {
+      @Override
+      long next() {
+        return _TestUtil.nextLong(random(), minValue, maxValue);
+      }
+    });
+  }
+  
+  private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
+    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv"));
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+    Field idField = new StringField("id", "", Field.Store.NO);
+    Field indexedField = newStringField("indexed", "", Field.Store.NO);
+    Field dvField = new NumericDocValuesField("dv", 0);
+
+    
+    // index some docs
+    int numDocs = atLeast(300);
+    // numDocs should be always > 256 so that in case of a codec that optimizes
+    // for numbers of values <= 256, all storage layouts are tested
+    assert numDocs > 256;
+    for (int i = 0; i < numDocs; i++) {
+      idField.setStringValue(Integer.toString(i));
+      long value = longs.next();
+      indexedField.setStringValue(Long.toString(value));
+      dvField.setLongValue(value);
+      Document doc = new Document();
+      doc.add(idField);
+      // 1/4 of the time we neglect to add the fields
+      if (random().nextInt(4) > 0) {
+        doc.add(indexedField);
+        doc.add(dvField);
+      }
+      writer.addDocument(doc);
+      if (random().nextInt(31) == 0) {
+        writer.commit();
+      }
+    }
+    
+    // delete some docs
+    int numDeletions = random().nextInt(numDocs/10);
+    for (int i = 0; i < numDeletions; i++) {
+      int id = random().nextInt(numDocs);
+      writer.deleteDocuments(new Term("id", Integer.toString(id)));
+    }
+
+    // merge some segments and ensure that at least one of them has more than
+    // 256 values
+    writer.forceMerge(numDocs / 256);
+
+    writer.close();
+    
+    // compare
+    DirectoryReader ir = DirectoryReader.open(dir);
+    for (AtomicReaderContext context : ir.leaves()) {
+      AtomicReader r = context.reader();
+      Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed");
+      Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv");
+      assertEquals(expected, actual);
+    }
+    ir.close();
+    dir.close();
+  }
+  
   public void testBooleanNumericsVsStoredFields() throws Exception {
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
@@ -1246,6 +1313,13 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     }
   }
   
+  public void testByteMissingVsFieldCache() throws Exception {
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestMissingVsFieldCache(Byte.MIN_VALUE, Byte.MAX_VALUE);
+    }
+  }
+  
   public void testShortNumericsVsStoredFields() throws Exception {
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
@@ -1253,6 +1327,13 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     }
   }
   
+  public void testShortMissingVsFieldCache() throws Exception {
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestMissingVsFieldCache(Short.MIN_VALUE, Short.MAX_VALUE);
+    }
+  }
+  
   public void testIntNumericsVsStoredFields() throws Exception {
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
@@ -1260,6 +1341,13 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     }
   }
   
+  public void testIntMissingVsFieldCache() throws Exception {
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestMissingVsFieldCache(Integer.MIN_VALUE, Integer.MAX_VALUE);
+    }
+  }
+  
   public void testLongNumericsVsStoredFields() throws Exception {
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
@@ -1267,6 +1355,13 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     }
   }
   
+  public void testLongMissingVsFieldCache() throws Exception {
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestMissingVsFieldCache(Long.MIN_VALUE, Long.MAX_VALUE);
+    }
+  }
+  
   private void doTestBinaryVsStoredFields(int minLength, int maxLength) throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
@@ -2014,6 +2109,13 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
       doTestSortedSetVsStoredFields(1, 10);
     }
   }
+
+  private void assertEquals(Bits expected, Bits actual) throws Exception {
+    assertEquals(expected.length(), actual.length());
+    for (int i = 0; i < expected.length(); i++) {
+      assertEquals(expected.get(i), actual.get(i));
+    }
+  }
   
   private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
     assertEquals(maxDoc, new SingletonSortedSetDocValues(expected), new SingletonSortedSetDocValues(actual));

From cf0078a6861051d9cf46d1981b360ac7520e28b1 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Tue, 20 Aug 2013 02:05:12 +0000
Subject: [PATCH 13/16] beef up tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515686 13f79535-47bb-0310-9956-ffa450edef68
---
 .../simpletext/SimpleTextDocValuesReader.java |  22 +-
 .../asserting/AssertingDocValuesFormat.java   |  88 ++++--
 .../asserting/AssertingNormsFormat.java       |   4 +-
 .../lucene/index/AssertingAtomicReader.java   |  22 ++
 .../index/BaseDocValuesFormatTestCase.java    | 258 +++++++++++++++++-
 .../apache/lucene/util/LuceneTestCase.java    |   8 +-
 6 files changed, 352 insertions(+), 50 deletions(-)

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
index 9ead984bb84..3753a62193c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
@@ -28,7 +28,6 @@ import java.util.Locale;
 import java.util.Map;
 
 import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.DocValuesProducer.SortedSetDocsWithField;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
@@ -431,16 +430,17 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
   
   @Override
   public Bits getDocsWithField(FieldInfo field) throws IOException {
-    if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
-      return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
-    } else if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED) {
-      return new SortedDocsWithField(getSorted(field), maxDoc);
-    } else if (field.getDocValuesType() == FieldInfo.DocValuesType.BINARY) {
-      return getBinaryDocsWithField(field);
-    } else if (field.getDocValuesType() == FieldInfo.DocValuesType.NUMERIC) {
-      return getNumericDocsWithField(field);
-    } else {
-      return new Bits.MatchAllBits(maxDoc);
+    switch (field.getDocValuesType()) {
+      case SORTED_SET:
+        return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+      case SORTED:
+        return new SortedDocsWithField(getSorted(field), maxDoc);
+      case BINARY:
+        return getBinaryDocsWithField(field);
+      case NUMERIC:
+        return getNumericDocsWithField(field);
+      default:
+        throw new AssertionError();
     }
   }
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
index 298d7aaf011..f3525a43189 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
@@ -175,28 +175,6 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
       checkIterator(ords.iterator(), ordCount, false);
       in.addSortedSetField(field, values, docToOrdCount, ords);
     }
-
-    private <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
-      for (long i = 0; i < expectedSize; i++) {
-        boolean hasNext = iterator.hasNext();
-        assert hasNext;
-        T v = iterator.next();
-        assert allowNull || v != null;
-        try {
-          iterator.remove();
-          throw new AssertionError("broken iterator (supports remove): " + iterator);
-        } catch (UnsupportedOperationException expected) {
-          // ok
-        }
-      }
-      assert !iterator.hasNext();
-      try {
-        iterator.next();
-        throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
-      } catch (NoSuchElementException expected) {
-        // ok
-      }
-    }
     
     @Override
     public void close() throws IOException {
@@ -204,6 +182,70 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
     }
   }
   
+  static class AssertingNormsConsumer extends DocValuesConsumer {
+    private final DocValuesConsumer in;
+    private final int maxDoc;
+    
+    AssertingNormsConsumer(DocValuesConsumer in, int maxDoc) {
+      this.in = in;
+      this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+      int count = 0;
+      for (Number v : values) {
+        assert v != null;
+        count++;
+      }
+      assert count == maxDoc;
+      checkIterator(values.iterator(), maxDoc, false);
+      in.addNumericField(field, values);
+    }
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+    @Override
+    public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
+      throw new IllegalStateException();
+    }
+  }
+  
+  private static <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
+    for (long i = 0; i < expectedSize; i++) {
+      boolean hasNext = iterator.hasNext();
+      assert hasNext;
+      T v = iterator.next();
+      assert allowNull || v != null;
+      try {
+        iterator.remove();
+        throw new AssertionError("broken iterator (supports remove): " + iterator);
+      } catch (UnsupportedOperationException expected) {
+        // ok
+      }
+    }
+    assert !iterator.hasNext();
+    try {
+      iterator.next();
+      throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
+    } catch (NoSuchElementException expected) {
+      // ok
+    }
+  }
+  
   static class AssertingDocValuesProducer extends DocValuesProducer {
     private final DocValuesProducer in;
     private final int maxDoc;
@@ -252,7 +294,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
       Bits bits = in.getDocsWithField(field);
       assert bits != null;
       assert bits.length() == maxDoc;
-      return bits; // TODO: add AssertingBits w/ bounds check
+      return new AssertingAtomicReader.AssertingBits(bits);
     }
 
     @Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
index 8b64401b452..b7662674133 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
@@ -22,7 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingDocValuesConsumer;
+import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingNormsConsumer;
 import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingDocValuesProducer;
 import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.index.SegmentReadState;
@@ -38,7 +38,7 @@ public class AssertingNormsFormat extends NormsFormat {
   public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
     DocValuesConsumer consumer = in.normsConsumer(state);
     assert consumer != null;
-    return new AssertingDocValuesConsumer(consumer, state.segmentInfo.getDocCount());
+    return new AssertingNormsConsumer(consumer, state.segmentInfo.getDocCount());
   }
 
   @Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
index 1bdb14cf574..086cb21fb89 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
@@ -607,12 +607,33 @@ public class AssertingAtomicReader extends FilterAtomicReader {
       return null;
     }
   }
+  
+  /** Wraps a Bits but with additional asserts */
+  public static class AssertingBits implements Bits {
+    final Bits in;
+    
+    public AssertingBits(Bits in) {
+      this.in = in;
+    }
+    
+    @Override
+    public boolean get(int index) {
+      assert index >= 0 && index < length();
+      return in.get(index);
+    }
+
+    @Override
+    public int length() {
+      return in.length();
+    }
+  }
 
   @Override
   public Bits getLiveDocs() {
     Bits liveDocs = super.getLiveDocs();
     if (liveDocs != null) {
       assert maxDoc() == liveDocs.length();
+      liveDocs = new AssertingBits(liveDocs);
     } else {
       assert maxDoc() == numDocs();
       assert !hasDeletions();
@@ -628,6 +649,7 @@ public class AssertingAtomicReader extends FilterAtomicReader {
       assert fi != null;
       assert fi.hasDocValues();
       assert maxDoc() == docsWithField.length();
+      docsWithField = new AssertingBits(docsWithField);
     } else {
       assert fi == null || fi.hasDocValues() == false;
     }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index 8dfaf0cec3f..9c94bb61567 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -26,6 +26,7 @@ import java.util.Map.Entry;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.CountDownLatch;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -673,7 +674,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     iwriter.close();
     
     SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
-    if (codecSupportsDocsWithField("field")) {
+    if (defaultCodecSupportsDocsWithField()) {
       assertEquals(-1, dv.getOrd(0));
       assertEquals(0, dv.getValueCount());
     } else {
@@ -733,7 +734,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     BytesRef scratch = new BytesRef();
     dv.lookupOrd(dv.getOrd(0), scratch);
     assertEquals(new BytesRef("hello world 2"), scratch);
-    if (codecSupportsDocsWithField("dv")) {
+    if (defaultCodecSupportsDocsWithField()) {
       assertEquals(-1, dv.getOrd(1));
     }
     dv.get(1, scratch);
@@ -1115,7 +1116,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
       doc.add(newTextField("id", "noValue", Field.Store.YES));
       w.addDocument(doc);
     }
-    if (!codecSupportsDocsWithField("field")) {
+    if (!defaultCodecSupportsDocsWithField()) {
       BytesRef bytesRef = new BytesRef();
       hash.add(bytesRef); // add empty value for the gaps
     }
@@ -1242,7 +1243,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
   }
   
   private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
-    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv"));
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
     Directory dir = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
@@ -2345,7 +2346,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
   }
   
   public void testTwoNumbersOneMissing() throws IOException {
-    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
     Directory directory = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.setMergePolicy(newLogMergePolicy());
@@ -2374,7 +2375,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
   }
   
   public void testTwoNumbersOneMissingWithMerging() throws IOException {
-    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
     Directory directory = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.setMergePolicy(newLogMergePolicy());
@@ -2404,7 +2405,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
   }
   
   public void testThreeNumbersOneMissingWithMerging() throws IOException {
-    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
     Directory directory = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.setMergePolicy(newLogMergePolicy());
@@ -2440,7 +2441,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
   }
   
   public void testTwoBytesOneMissing() throws IOException {
-    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
     Directory directory = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.setMergePolicy(newLogMergePolicy());
@@ -2472,7 +2473,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
   }
   
   public void testTwoBytesOneMissingWithMerging() throws IOException {
-    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
     Directory directory = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.setMergePolicy(newLogMergePolicy());
@@ -2505,7 +2506,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
   }
   
   public void testThreeBytesOneMissingWithMerging() throws IOException {
-    assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField("dv1"));
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
     Directory directory = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.setMergePolicy(newLogMergePolicy());
@@ -2709,6 +2710,243 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
     ar.close();
     d.close();
   }
+  
+  /** Tests dv against stored fields with threads (binary/numeric/sorted, no missing) */
+  public void testThreads() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+    Document doc = new Document();
+    Field idField = new StringField("id", "", Field.Store.NO);
+    Field storedBinField = new StoredField("storedBin", new byte[0]);
+    Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
+    Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
+    Field storedNumericField = new StoredField("storedNum", "");
+    Field dvNumericField = new NumericDocValuesField("dvNum", 0);
+    doc.add(idField);
+    doc.add(storedBinField);
+    doc.add(dvBinField);
+    doc.add(dvSortedField);
+    doc.add(storedNumericField);
+    doc.add(dvNumericField);
+    
+    // index some docs
+    int numDocs = atLeast(300);
+    for (int i = 0; i < numDocs; i++) {
+      idField.setStringValue(Integer.toString(i));
+      int length = _TestUtil.nextInt(random(), 0, 8);
+      byte buffer[] = new byte[length];
+      random().nextBytes(buffer);
+      storedBinField.setBytesValue(buffer);
+      dvBinField.setBytesValue(buffer);
+      dvSortedField.setBytesValue(buffer);
+      long numericValue = random().nextLong();
+      storedNumericField.setStringValue(Long.toString(numericValue));
+      dvNumericField.setLongValue(numericValue);
+      writer.addDocument(doc);
+      if (random().nextInt(31) == 0) {
+        writer.commit();
+      }
+    }
+    
+    // delete some docs
+    int numDeletions = random().nextInt(numDocs/10);
+    for (int i = 0; i < numDeletions; i++) {
+      int id = random().nextInt(numDocs);
+      writer.deleteDocuments(new Term("id", Integer.toString(id)));
+    }
+    writer.close();
+    
+    // compare
+    final DirectoryReader ir = DirectoryReader.open(dir);
+    int numThreads = _TestUtil.nextInt(random(), 2, 7);
+    Thread threads[] = new Thread[numThreads];
+    final CountDownLatch startingGun = new CountDownLatch(1);
+    
+    for (int i = 0; i < threads.length; i++) {
+      threads[i] = new Thread() {
+        @Override
+        public void run() {
+          try {
+            startingGun.await();
+            for (AtomicReaderContext context : ir.leaves()) {
+              AtomicReader r = context.reader();
+              BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
+              SortedDocValues sorted = r.getSortedDocValues("dvSorted");
+              NumericDocValues numerics = r.getNumericDocValues("dvNum");
+              for (int j = 0; j < r.maxDoc(); j++) {
+                BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
+                BytesRef scratch = new BytesRef();
+                binaries.get(j, scratch);
+                assertEquals(binaryValue, scratch);
+                sorted.get(j, scratch);
+                assertEquals(binaryValue, scratch);
+                String expected = r.document(j).get("storedNum");
+                assertEquals(Long.parseLong(expected), numerics.get(j));
+              }
+            }
+            _TestUtil.checkReader(ir);
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+      threads[i].start();
+    }
+    startingGun.countDown();
+    for (Thread t : threads) {
+      t.join();
+    }
+    ir.close();
+    dir.close();
+  }
+  
+  /** Tests dv against stored fields with threads (all types + missing) */
+  public void testThreads2() throws Exception {
+    assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+    Field idField = new StringField("id", "", Field.Store.NO);
+    Field storedBinField = new StoredField("storedBin", new byte[0]);
+    Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
+    Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
+    Field storedNumericField = new StoredField("storedNum", "");
+    Field dvNumericField = new NumericDocValuesField("dvNum", 0);
+    
+    // index some docs
+    int numDocs = atLeast(300);
+    for (int i = 0; i < numDocs; i++) {
+      idField.setStringValue(Integer.toString(i));
+      int length = _TestUtil.nextInt(random(), 0, 8);
+      byte buffer[] = new byte[length];
+      random().nextBytes(buffer);
+      storedBinField.setBytesValue(buffer);
+      dvBinField.setBytesValue(buffer);
+      dvSortedField.setBytesValue(buffer);
+      long numericValue = random().nextLong();
+      storedNumericField.setStringValue(Long.toString(numericValue));
+      dvNumericField.setLongValue(numericValue);
+      Document doc = new Document();
+      doc.add(idField);
+      if (random().nextInt(4) > 0) {
+        doc.add(storedBinField);
+        doc.add(dvBinField);
+        doc.add(dvSortedField);
+      }
+      if (random().nextInt(4) > 0) {
+        doc.add(storedNumericField);
+        doc.add(dvNumericField);
+      }
+      int numSortedSetFields = random().nextInt(3);
+      Set<String> values = new TreeSet<String>();
+      for (int j = 0; j < numSortedSetFields; j++) {
+        values.add(_TestUtil.randomSimpleString(random()));
+      }
+      for (String v : values) {
+        doc.add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
+        doc.add(new StoredField("storedSortedSet", v));
+      }
+      writer.addDocument(doc);
+      if (random().nextInt(31) == 0) {
+        writer.commit();
+      }
+    }
+    
+    // delete some docs
+    int numDeletions = random().nextInt(numDocs/10);
+    for (int i = 0; i < numDeletions; i++) {
+      int id = random().nextInt(numDocs);
+      writer.deleteDocuments(new Term("id", Integer.toString(id)));
+    }
+    writer.close();
+    
+    // compare
+    final DirectoryReader ir = DirectoryReader.open(dir);
+    int numThreads = _TestUtil.nextInt(random(), 2, 7);
+    Thread threads[] = new Thread[numThreads];
+    final CountDownLatch startingGun = new CountDownLatch(1);
+    
+    for (int i = 0; i < threads.length; i++) {
+      threads[i] = new Thread() {
+        @Override
+        public void run() {
+          try {
+            startingGun.await();
+            for (AtomicReaderContext context : ir.leaves()) {
+              AtomicReader r = context.reader();
+              BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
+              Bits binaryBits = r.getDocsWithField("dvBin");
+              SortedDocValues sorted = r.getSortedDocValues("dvSorted");
+              Bits sortedBits = r.getDocsWithField("dvSorted");
+              NumericDocValues numerics = r.getNumericDocValues("dvNum");
+              Bits numericBits = r.getDocsWithField("dvNum");
+              SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet");
+              Bits sortedSetBits = r.getDocsWithField("dvSortedSet");
+              for (int j = 0; j < r.maxDoc(); j++) {
+                BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
+                if (binaryValue != null) {
+                  if (binaries != null) {
+                    BytesRef scratch = new BytesRef();
+                    binaries.get(j, scratch);
+                    assertEquals(binaryValue, scratch);
+                    sorted.get(j, scratch);
+                    assertEquals(binaryValue, scratch);
+                    assertTrue(binaryBits.get(j));
+                    assertTrue(sortedBits.get(j));
+                  }
+                } else if (binaries != null) {
+                  assertFalse(binaryBits.get(j));
+                  assertFalse(sortedBits.get(j));
+                  assertEquals(-1, sorted.getOrd(j));
+                }
+               
+                String number = r.document(j).get("storedNum");
+                if (number != null) {
+                  if (numerics != null) {
+                    assertEquals(Long.parseLong(number), numerics.get(j));
+                  }
+                } else if (numerics != null) {
+                  assertFalse(numericBits.get(j));
+                  assertEquals(0, numerics.get(j));
+                }
+                
+                String values[] = r.document(j).getValues("storedSortedSet");
+                if (values.length > 0) {
+                  assertNotNull(sortedSet);
+                  sortedSet.setDocument(j);
+                  for (int i = 0; i < values.length; i++) {
+                    long ord = sortedSet.nextOrd();
+                    assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
+                    BytesRef value = new BytesRef();
+                    sortedSet.lookupOrd(ord, value);
+                    assertEquals(values[i], value.utf8ToString());
+                  }
+                  assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+                  assertTrue(sortedSetBits.get(j));
+                } else if (sortedSet != null) {
+                  sortedSet.setDocument(j);
+                  assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+                  assertFalse(sortedSetBits.get(j));
+                }
+              }
+            }
+            _TestUtil.checkReader(ir);
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+      threads[i].start();
+    }
+    startingGun.countDown();
+    for (Thread t : threads) {
+      t.join();
+    }
+    ir.close();
+    dir.close();
+  }
 
   protected boolean codecAcceptsHugeBinaryValues(String field) {
     return true;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index 27f736b8706..6f9f2b3e749 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -1369,11 +1369,11 @@ public abstract class LuceneTestCase extends Assert {
     return true;
   }
   
-  /** Returns true if the codec for the field "supports" docsWithField 
+  /** Returns true if the codec "supports" docsWithField 
    * (other codecs return MatchAllBits, because you couldnt write missing values before) */
-  public static boolean codecSupportsDocsWithField(String field) {
-    String name = _TestUtil.getDocValuesFormat(Codec.getDefault(), field);
-    if (name.equals("Lucene40") || name.equals("Lucene42")) {
+  public static boolean defaultCodecSupportsDocsWithField() {
+    String name = Codec.getDefault().getName();
+    if (name.equals("Lucene40") || name.equals("Lucene41") || name.equals("Lucene42")) {
       return false;
     }
     return true;

From 424369f49140e3a69e9784013f23e52b57c0be23 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Tue, 20 Aug 2013 02:46:13 +0000
Subject: [PATCH 14/16] add test

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515689 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/index/TestMultiDocValues.java      | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
index 78c8974c555..8944dd66b67 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
@@ -26,6 +26,7 @@ import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
@@ -321,4 +322,52 @@ public class TestMultiDocValues extends LuceneTestCase {
     ir2.close();
     dir.close();
   }
+  
+  public void testDocsWithField() throws Exception {
+    assumeTrue("codec does not support docsWithField", defaultCodecSupportsDocsWithField());
+    Directory dir = newDirectory();
+    
+    IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+    int numDocs = atLeast(500);
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      if (random().nextInt(4) >= 0) {
+        doc.add(new NumericDocValuesField("numbers", random().nextLong()));
+      }
+      doc.add(new NumericDocValuesField("numbersAlways", random().nextLong()));
+      iw.addDocument(doc);
+      if (random().nextInt(17) == 0) {
+        iw.commit();
+      }
+    }
+    DirectoryReader ir = iw.getReader();
+    iw.forceMerge(1);
+    DirectoryReader ir2 = iw.getReader();
+    AtomicReader merged = getOnlySegmentReader(ir2);
+    iw.close();
+    
+    Bits multi = MultiDocValues.getDocsWithField(ir, "numbers");
+    Bits single = merged.getDocsWithField("numbers");
+    if (multi == null) {
+      assertNull(single);
+    } else {
+      assertEquals(single.length(), multi.length());
+      for (int i = 0; i < numDocs; i++) {
+        assertEquals(single.get(i), multi.get(i));
+      }
+    }
+    
+    multi = MultiDocValues.getDocsWithField(ir, "numbersAlways");
+    single = merged.getDocsWithField("numbersAlways");
+    assertEquals(single.length(), multi.length());
+    for (int i = 0; i < numDocs; i++) {
+      assertEquals(single.get(i), multi.get(i));
+    }
+    ir.close();
+    ir2.close();
+    dir.close();
+  }
 }

From d15e95bd050c26b7c15cc57303d4a0cd52089e68 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Tue, 20 Aug 2013 16:50:56 +0000
Subject: [PATCH 15/16] beef up TestDuelingCodecs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515870 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/lucene/index/TestDuelingCodecs.java |  6 ++++++
 .../org/apache/lucene/util/LuceneTestCase.java     | 14 ++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
index e222258bf0a..25fef8ca367 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -134,6 +135,11 @@ public class TestDuelingCodecs extends LuceneTestCase {
       for (String trash : split) {
         document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
       }
+      // add a numeric dv field sometimes
+      document.removeFields("sparsenumeric");
+      if (random.nextInt(4) == 2) {
+        document.add(new NumericDocValuesField("sparsenumeric", random.nextInt()));
+      }
       writer.addDocument(document);
     }
     
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index 6f9f2b3e749..1ca13974195 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -1972,6 +1972,20 @@ public abstract class LuceneTestCase extends Assert {
           assertNull(info, rightValues);
         }
       }
+      
+      {
+        Bits leftBits = MultiDocValues.getDocsWithField(leftReader, field);
+        Bits rightBits = MultiDocValues.getDocsWithField(rightReader, field);
+        if (leftBits != null && rightBits != null) {
+          assertEquals(info, leftBits.length(), rightBits.length());
+          for (int i = 0; i < leftBits.length(); i++) {
+            assertEquals(info, leftBits.get(i), rightBits.get(i));
+          }
+        } else {
+          assertNull(info, leftBits);
+          assertNull(info, rightBits);
+        }
+      }
     }
   }
   

From 2bbc869516c222ad0ddc56838a3c32010d0d46cc Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Tue, 20 Aug 2013 20:37:06 +0000
Subject: [PATCH 16/16] add a solr test for missing dv

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515959 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/solr/schema/SchemaField.java   |  12 +-
 .../conf/schema-docValuesMissing.xml          |  61 +++
 .../solr/schema/DocValuesMissingTest.java     | 479 ++++++++++++++++++
 3 files changed, 550 insertions(+), 2 deletions(-)
 create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml
 create mode 100644 solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java

diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaField.java b/solr/core/src/java/org/apache/solr/schema/SchemaField.java
index 5a1fb5b82aa..ca6bd90d1b5 100644
--- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java
@@ -239,13 +239,21 @@ public final class SchemaField extends FieldProperties {
 
     if (on(falseProps,INDEXED)) {
       int pp = (INDEXED 
-              | STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS
-              | SORT_MISSING_FIRST | SORT_MISSING_LAST);
+              | STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
       if (on(pp,trueProps)) {
         throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed field:" + props);
       }
       p &= ~pp;
     }
+    
+    if (on(falseProps,INDEXED) && on(falseProps,DOC_VALUES)) {
+      int pp = (SORT_MISSING_FIRST | SORT_MISSING_LAST);
+      if (on(pp,trueProps)) {
+        throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed/non-docValues field:" + props);
+      }
+      p &= ~pp;
+    }
+    
     if (on(falseProps,INDEXED)) {
       int pp = (OMIT_NORMS | OMIT_TF_POSITIONS | OMIT_POSITIONS);
       if (on(pp,falseProps)) {
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml
new file mode 100644
index 00000000000..05f73e15f29
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- schema file for testing sorting/faceting/functions on single-valued DV with missing values -->
+
+<schema name="schema-docValuesMissing" version="1.5">
+  <types>
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldtype name="date" class="solr.TrieDateField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldtype name="string" class="solr.StrField" />
+  </types>
+
+  <fields>
+    <field name="id" type="string" required="true" />
+
+    <field name="floatdv"               type="float" indexed="false" stored="false" docValues="true"/>
+    <field name="floatdv_missingfirst"  type="float" indexed="false" stored="false" docValues="true" sortMissingFirst="true"/>
+    <field name="floatdv_missinglast"   type="float" indexed="false" stored="false" docValues="true" sortMissingLast="true"/>
+    
+    <field name="intdv"                 type="int" indexed="false" stored="false" docValues="true"/>
+    <field name="intdv_missingfirst"    type="int" indexed="false" stored="false" docValues="true" sortMissingFirst="true"/>
+    <field name="intdv_missinglast"     type="int" indexed="false" stored="false" docValues="true" sortMissingLast="true"/>
+
+    <field name="doubledv"              type="double" indexed="false" stored="false" docValues="true"/>
+    <field name="doubledv_missingfirst" type="double" indexed="false" stored="false" docValues="true" sortMissingFirst="true"/>
+    <field name="doubledv_missinglast"  type="double" indexed="false" stored="false" docValues="true" sortMissingLast="true"/>
+
+    <field name="longdv"                type="long" indexed="false" stored="false" docValues="true"/>
+    <field name="longdv_missingfirst"   type="long" indexed="false" stored="false" docValues="true" sortMissingFirst="true"/>
+    <field name="longdv_missinglast"    type="long" indexed="false" stored="false" docValues="true" sortMissingLast="true"/>
+ 
+    <field name="datedv"                type="date" indexed="false" stored="false" docValues="true"/>
+    <field name="datedv_missingfirst"   type="date" indexed="false" stored="false" docValues="true" sortMissingFirst="true"/>
+    <field name="datedv_missinglast"    type="date" indexed="false" stored="false" docValues="true" sortMissingLast="true"/>
+
+    <field name="stringdv"              type="string" indexed="false" stored="false" docValues="true"/>
+    <field name="stringdv_missingfirst" type="string" indexed="false" stored="false" docValues="true" sortMissingFirst="true"/>
+    <field name="stringdv_missinglast"  type="string" indexed="false" stored="false" docValues="true" sortMissingLast="true"/>
+  </fields>
+
+  <uniqueKey>id</uniqueKey>
+
+</schema>
diff --git a/solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java b/solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java
new file mode 100644
index 00000000000..a0dc27bc70c
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java
@@ -0,0 +1,479 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+
+/**
+ * Tests things like sorting on docvalues with missing values
+ */
+@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // old formats cannot represent missing values
+public class DocValuesMissingTest extends SolrTestCaseJ4 {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-basic.xml", "schema-docValuesMissing.xml");
+  }
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    clearIndex();
+    assertU(commit());
+  }
+  
+  /** float with default lucene sort (treats as 0) */
+  public void testFloatSort() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "floatdv", "-1.3"));
+    assertU(adoc("id", "2", "floatdv", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "floatdv asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "floatdv desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** float with sort missing always first */
+  public void testFloatSortMissingFirst() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "floatdv_missingfirst", "-1.3"));
+    assertU(adoc("id", "2", "floatdv_missingfirst", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "floatdv_missingfirst asc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "floatdv_missingfirst desc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** float with sort missing always last */
+  public void testFloatSortMissingLast() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "floatdv_missinglast", "-1.3"));
+    assertU(adoc("id", "2", "floatdv_missinglast", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "floatdv_missinglast asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+    assertQ(req("q", "*:*", "sort", "floatdv_missinglast desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+  }
+  
+  /** float function query based on missing */
+  public void testFloatMissingFunction() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "floatdv", "-1.3"));
+    assertU(adoc("id", "2", "floatdv", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "fl", "e:exists(floatdv)", "sort", "id asc"),
+        "//result/doc[1]/bool[@name='e'][.='false']",
+        "//result/doc[2]/bool[@name='e'][.='true']",
+        "//result/doc[3]/bool[@name='e'][.='true']");
+  }
+  
+  /** float missing facet count */
+  public void testFloatMissingFacet() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1")); // missing
+    assertU(adoc("id", "2", "floatdv", "-1.3"));
+    assertU(adoc("id", "3", "floatdv", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "facet", "true", "facet.field", "floatdv", "facet.mincount", "1", "facet.missing", "true"),
+        "//lst[@name='facet_fields']/lst[@name='floatdv']/int[@name='-1.3'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='floatdv']/int[@name='4.2'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='floatdv']/int[.=2]");
+  }
+  
+  /** int with default lucene sort (treats as 0) */
+  public void testIntSort() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "intdv", "-1"));
+    assertU(adoc("id", "2", "intdv", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "intdv asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "intdv desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** int with sort missing always first */
+  public void testIntSortMissingFirst() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "intdv_missingfirst", "-1"));
+    assertU(adoc("id", "2", "intdv_missingfirst", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "intdv_missingfirst asc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "intdv_missingfirst desc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** int with sort missing always last */
+  public void testIntSortMissingLast() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "intdv_missinglast", "-1"));
+    assertU(adoc("id", "2", "intdv_missinglast", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "intdv_missinglast asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+    assertQ(req("q", "*:*", "sort", "intdv_missinglast desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+  }
+  
+  /** int function query based on missing */
+  public void testIntMissingFunction() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "intdv", "-1"));
+    assertU(adoc("id", "2", "intdv", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "fl", "e:exists(intdv)", "sort", "id asc"),
+        "//result/doc[1]/bool[@name='e'][.='false']",
+        "//result/doc[2]/bool[@name='e'][.='true']",
+        "//result/doc[3]/bool[@name='e'][.='true']");
+  }
+  
+  /** int missing facet count */
+  public void testIntMissingFacet() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1")); // missing
+    assertU(adoc("id", "2", "intdv", "-1"));
+    assertU(adoc("id", "3", "intdv", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "facet", "true", "facet.field", "intdv", "facet.mincount", "1", "facet.missing", "true"),
+        "//lst[@name='facet_fields']/lst[@name='intdv']/int[@name='-1'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='intdv']/int[@name='4'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='intdv']/int[.=2]");
+  }
+  
+  /** double with default lucene sort (treats as 0) */
+  public void testDoubleSort() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "doubledv", "-1.3"));
+    assertU(adoc("id", "2", "doubledv", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "doubledv asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "doubledv desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** double with sort missing always first */
+  public void testDoubleSortMissingFirst() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "doubledv_missingfirst", "-1.3"));
+    assertU(adoc("id", "2", "doubledv_missingfirst", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "doubledv_missingfirst asc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "doubledv_missingfirst desc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** double with sort missing always last */
+  public void testDoubleSortMissingLast() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "doubledv_missinglast", "-1.3"));
+    assertU(adoc("id", "2", "doubledv_missinglast", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "doubledv_missinglast asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+    assertQ(req("q", "*:*", "sort", "doubledv_missinglast desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+  }
+  
+  /** double function query based on missing */
+  public void testDoubleMissingFunction() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "doubledv", "-1.3"));
+    assertU(adoc("id", "2", "doubledv", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "fl", "e:exists(doubledv)", "sort", "id asc"),
+        "//result/doc[1]/bool[@name='e'][.='false']",
+        "//result/doc[2]/bool[@name='e'][.='true']",
+        "//result/doc[3]/bool[@name='e'][.='true']");
+  }
+  
+  /** double missing facet count */
+  public void testDoubleMissingFacet() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1")); // missing
+    assertU(adoc("id", "2", "doubledv", "-1.3"));
+    assertU(adoc("id", "3", "doubledv", "4.2"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "facet", "true", "facet.field", "doubledv", "facet.mincount", "1", "facet.missing", "true"),
+        "//lst[@name='facet_fields']/lst[@name='doubledv']/int[@name='-1.3'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='doubledv']/int[@name='4.2'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='doubledv']/int[.=2]");
+  }
+  
+  /** long with default lucene sort (treats as 0) */
+  public void testLongSort() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "longdv", "-1"));
+    assertU(adoc("id", "2", "longdv", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "longdv asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "longdv desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** long with sort missing always first */
+  public void testLongSortMissingFirst() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "longdv_missingfirst", "-1"));
+    assertU(adoc("id", "2", "longdv_missingfirst", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "longdv_missingfirst asc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "longdv_missingfirst desc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** long with sort missing always last */
+  public void testLongSortMissingLast() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "longdv_missinglast", "-1"));
+    assertU(adoc("id", "2", "longdv_missinglast", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "longdv_missinglast asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+    assertQ(req("q", "*:*", "sort", "longdv_missinglast desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+  }
+  
+  /** long function query based on missing */
+  public void testLongMissingFunction() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "longdv", "-1"));
+    assertU(adoc("id", "2", "longdv", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "fl", "e:exists(longdv)", "sort", "id asc"),
+        "//result/doc[1]/bool[@name='e'][.='false']",
+        "//result/doc[2]/bool[@name='e'][.='true']",
+        "//result/doc[3]/bool[@name='e'][.='true']");
+  }
+  
+  /** long missing facet count */
+  public void testLongMissingFacet() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1")); // missing
+    assertU(adoc("id", "2", "longdv", "-1"));
+    assertU(adoc("id", "3", "longdv", "4"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "facet", "true", "facet.field", "longdv", "facet.mincount", "1", "facet.missing", "true"),
+        "//lst[@name='facet_fields']/lst[@name='longdv']/int[@name='-1'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='longdv']/int[@name='4'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='longdv']/int[.=2]");
+  }
+  
+  /** date with default lucene sort (treats as 1970) */
+  public void testDateSort() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "datedv", "1900-12-31T23:59:59.999Z"));
+    assertU(adoc("id", "2", "datedv", "2005-12-31T23:59:59.999Z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "datedv asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "datedv desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=0]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** date with sort missing always first */
+  public void testDateSortMissingFirst() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "datedv_missingfirst", "1900-12-31T23:59:59.999Z"));
+    assertU(adoc("id", "2", "datedv_missingfirst", "2005-12-31T23:59:59.999Z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "datedv_missingfirst asc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "datedv_missingfirst desc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** date with sort missing always last */
+  public void testDateSortMissingLast() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "datedv_missinglast", "1900-12-31T23:59:59.999Z"));
+    assertU(adoc("id", "2", "datedv_missinglast", "2005-12-31T23:59:59.999Z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "datedv_missinglast asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+    assertQ(req("q", "*:*", "sort", "datedv_missinglast desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+  }
+  
+  /** date function query based on missing */
+  public void testDateMissingFunction() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "datedv", "1900-12-31T23:59:59.999Z"));
+    assertU(adoc("id", "2", "datedv", "2005-12-31T23:59:59.999Z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "fl", "e:exists(datedv)", "sort", "id asc"),
+        "//result/doc[1]/bool[@name='e'][.='false']",
+        "//result/doc[2]/bool[@name='e'][.='true']",
+        "//result/doc[3]/bool[@name='e'][.='true']");
+  }
+  
+  /** date missing facet count */
+  public void testDateMissingFacet() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1")); // missing
+    assertU(adoc("id", "2", "datedv", "1900-12-31T23:59:59.999Z"));
+    assertU(adoc("id", "3", "datedv", "2005-12-31T23:59:59.999Z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "facet", "true", "facet.field", "datedv", "facet.mincount", "1", "facet.missing", "true"),
+        "//lst[@name='facet_fields']/lst[@name='datedv']/int[@name='1900-12-31T23:59:59.999Z'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='datedv']/int[@name='2005-12-31T23:59:59.999Z'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='datedv']/int[.=2]");
+  }
+  
+  /** string with default lucene sort (treats as "") */
+  public void testStringSort() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "stringdv", "a"));
+    assertU(adoc("id", "2", "stringdv", "z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "stringdv asc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "stringdv desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+  }
+  
+  /** string with sort missing always first */
+  public void testStringSortMissingFirst() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "stringdv_missingfirst", "a"));
+    assertU(adoc("id", "2", "stringdv_missingfirst", "z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "stringdv_missingfirst asc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=2]");
+    assertQ(req("q", "*:*", "sort", "stringdv_missingfirst desc"),
+        "//result/doc[1]/str[@name='id'][.=0]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=1]");
+  }
+  
+  /** string with sort missing always last */
+  public void testStringSortMissingLast() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "stringdv_missinglast", "a"));
+    assertU(adoc("id", "2", "stringdv_missinglast", "z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "sort", "stringdv_missinglast asc"),
+        "//result/doc[1]/str[@name='id'][.=1]",
+        "//result/doc[2]/str[@name='id'][.=2]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+    assertQ(req("q", "*:*", "sort", "stringdv_missinglast desc"),
+        "//result/doc[1]/str[@name='id'][.=2]",
+        "//result/doc[2]/str[@name='id'][.=1]",
+        "//result/doc[3]/str[@name='id'][.=0]");
+  }
+  
+  /** string function query based on missing */
+  public void testStringMissingFunction() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1", "stringdv", "a"));
+    assertU(adoc("id", "2", "stringdv", "z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "fl", "e:exists(stringdv)", "sort", "id asc"),
+        "//result/doc[1]/bool[@name='e'][.='false']",
+        "//result/doc[2]/bool[@name='e'][.='true']",
+        "//result/doc[3]/bool[@name='e'][.='true']");
+  }
+  
+  /** string missing facet count */
+  public void testStringMissingFacet() throws Exception {
+    assertU(adoc("id", "0")); // missing
+    assertU(adoc("id", "1")); // missing
+    assertU(adoc("id", "2", "stringdv", "a"));
+    assertU(adoc("id", "3", "stringdv", "z"));
+    assertU(commit());
+    assertQ(req("q", "*:*", "facet", "true", "facet.field", "stringdv", "facet.mincount", "1", "facet.missing", "true"),
+        "//lst[@name='facet_fields']/lst[@name='stringdv']/int[@name='a'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='stringdv']/int[@name='z'][.=1]",
+        "//lst[@name='facet_fields']/lst[@name='stringdv']/int[.=2]");
+  }
+}