LUCENE-7475: Make norms sparse.

2016-10-05 14:07:15 +02:00 · 2016-10-05 14:07:15 +02:00 · 9128bdbaf5
parent 28d187acd1
commit 9128bdbaf5
36 changed files with 1219 additions and 107 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -22,6 +22,9 @@ API Changes
  to iterators, enabling future codec compression improvements. (Mike
  McCandless)

+* LUCENE-7475: Norms now support sparsity, allowing to pay for what is
+  actually used. (Adrien Grand)
+
 Bug Fixes

 Improvements
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene53/Lucene53NormsFormat.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene53/Lucene53NormsFormat.java
@ -74,7 +74,7 @@ public class Lucene53NormsFormat extends NormsFormat {
  
  @Override
  public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
-    return new Lucene53NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+    throw new UnsupportedOperationException("This format can only be used for reading");
  }

  @Override
@ -82,10 +82,10 @@ public class Lucene53NormsFormat extends NormsFormat {
    return new Lucene53NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
  }
  
-  private static final String DATA_CODEC = "Lucene53NormsData";
-  private static final String DATA_EXTENSION = "nvd";
-  private static final String METADATA_CODEC = "Lucene53NormsMetadata";
-  private static final String METADATA_EXTENSION = "nvm";
+  static final String DATA_CODEC = "Lucene53NormsData";
+  static final String DATA_EXTENSION = "nvd";
+  static final String METADATA_CODEC = "Lucene53NormsMetadata";
+  static final String METADATA_EXTENSION = "nvm";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
 }
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene53/Lucene53NormsProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene53/Lucene53NormsProducer.java
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene53/package-info.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene53/package-info.java
@ -17,7 +17,7 @@

 /**
 * Components from the Lucene 5.3 index format
- * See {@link org.apache.lucene.codecs.lucene54} for an overview
+ * See {@link org.apache.lucene.codecs.lucene53} for an overview
 * of the index format.
 */
 package org.apache.lucene.codecs.lucene53;
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesConsumer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesConsumer.java
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesFormat.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesFormat.java
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/package-info.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/package-info.java
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
@ -171,7 +171,7 @@ public class Lucene60Codec extends Codec {
  private final NormsFormat normsFormat = new Lucene53NormsFormat();

  @Override
-  public final NormsFormat normsFormat() {
+  public NormsFormat normsFormat() {
    return normsFormat;
  }
 }
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java
@ -32,9 +32,9 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
 import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
 import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat;
 import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat;
@ -170,7 +170,7 @@ public class Lucene62Codec extends Codec {
  private final NormsFormat normsFormat = new Lucene53NormsFormat();

  @Override
-  public final NormsFormat normsFormat() {
+  public NormsFormat normsFormat() {
    return normsFormat;
  }
 }
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene62/package.html
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene62/package.html
@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Lucene 6.2 file format.
+</body>
+</html>
--- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@ -14,3 +14,4 @@
 #  limitations under the License.

 org.apache.lucene.codecs.lucene60.Lucene60Codec
+org.apache.lucene.codecs.lucene62.Lucene62Codec
--- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@ -13,3 +13,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

+org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene53/Lucene53NormsConsumer.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene53/Lucene53NormsConsumer.java
@ -58,7 +58,7 @@ class Lucene53NormsConsumer extends NormsConsumer {

  @Override
  public void addNormsField(FieldInfo field, NormsProducer normsProducer) throws IOException {
-    addNormsField(field, LegacyDocValuesIterables.normsIterable(field, normsProducer, maxDoc));
+    addNormsField(field, LegacyDocValuesIterables.normsIterable(field, normsProducer, maxDoc, true));
  }

  private void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene53/Lucene53RWNormsFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene53/Lucene53RWNormsFormat.java
@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene53;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.NormsConsumer;
+import org.apache.lucene.index.SegmentWriteState;
+
+public class Lucene53RWNormsFormat extends Lucene53NormsFormat {
+
+  @Override
+  public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
+    return new Lucene53NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+  }
+
+}
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java
@ -16,19 +16,23 @@
 */
 package org.apache.lucene.codecs.lucene53;

-
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene70.Lucene70Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62RWCodec;
 import org.apache.lucene.index.BaseNormsFormatTestCase;

 /**
 * Tests Lucene53NormsFormat
 */
 public class TestLucene53NormsFormat extends BaseNormsFormatTestCase {
-  private final Codec codec = new Lucene70Codec();
-  
+  private final Codec codec = new Lucene62RWCodec();
+
  @Override
  protected Codec getCodec() {
    return codec;
  }
-}
+
+  @Override
+  protected boolean codecSupportsSparsity() {
+    return false;
+  }
+}
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene62/Lucene62RWCodec.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene62/Lucene62RWCodec.java
@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene62;
+
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.lucene53.Lucene53RWNormsFormat;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
+
+public class Lucene62RWCodec extends Lucene62Codec {
+
+  private final NormsFormat normsFormat = new Lucene53RWNormsFormat();
+
+  @Override
+  public NormsFormat normsFormat() {
+    return normsFormat;
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
@ -187,7 +187,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
    };
  }
  
-  private Bits getNumericDocsWithField(FieldInfo fieldInfo) throws IOException {
+  public Bits getNumericDocsWithField(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);
    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java
@ -30,7 +30,6 @@ import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Bits;

 /**
 * plain-text norms format.
@ -70,7 +69,7 @@ public class SimpleTextNormsFormat extends NormsFormat {
    
    @Override
    public NumericDocValues getNorms(FieldInfo field) throws IOException {
-      return new LegacyNumericDocValuesWrapper(new Bits.MatchAllBits(impl.maxDoc), impl.getNumericNonIterator(field));
+      return new LegacyNumericDocValuesWrapper(impl.getNumericDocsWithField(field), impl.getNumericNonIterator(field));
    }
    
    @Override
@ -117,7 +116,7 @@ public class SimpleTextNormsFormat extends NormsFormat {
    
    @Override
    public void addNormsField(FieldInfo field, NormsProducer normsProducer) throws IOException {
-      impl.addNumericField(field, LegacyDocValuesIterables.normsIterable(field, normsProducer, impl.numDocs));
+      impl.addNumericField(field, LegacyDocValuesIterables.normsIterable(field, normsProducer, impl.numDocs, false));
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/LegacyDocValuesIterables.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/LegacyDocValuesIterables.java
@ -372,7 +372,8 @@ public class LegacyDocValuesIterables {
   *
   * @deprecated Consume {@link NumericDocValues} instead. */
  @Deprecated
-  public static Iterable<Number> normsIterable(final FieldInfo field, final NormsProducer normsProducer, final int maxDoc) {
+  public static Iterable<Number> normsIterable(final FieldInfo field,
+      final NormsProducer normsProducer, final int maxDoc, boolean missingAsZero) {

    return new Iterable<Number>() {

@ -411,9 +412,11 @@ public class LegacyDocValuesIterables {
              } catch (IOException ioe) {
                throw new RuntimeException(ioe);
              }
-            } else {
+            } else if (missingAsZero) {
              // Unlike NumericDocValues, norms should return for missing values:
              result = 0;
+            } else {
+              result = null;
            }
            return result;
          }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/package-info.java
@ -17,7 +17,7 @@

 /**
 * Components from the Lucene 5.0 index format
- * See {@link org.apache.lucene.codecs.lucene53} for an overview
+ * See {@link org.apache.lucene.codecs.lucene50} for an overview
 * of the index format.
 */
 package org.apache.lucene.codecs.lucene50;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java
@ -35,7 +35,6 @@ import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
-import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat;
 import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat;
 import org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat;
@ -168,7 +167,7 @@ public class Lucene70Codec extends Codec {
  private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
  private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene70");

-  private final NormsFormat normsFormat = new Lucene53NormsFormat();
+  private final NormsFormat normsFormat = new Lucene70NormsFormat();

  @Override
  public final NormsFormat normsFormat() {
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsConsumer.java
@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+import static org.apache.lucene.codecs.lucene70.Lucene70NormsFormat.VERSION_CURRENT;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.NormsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Writer for {@link Lucene70NormsFormat}
+ */
+final class Lucene70NormsConsumer extends NormsConsumer {
+  IndexOutput data, meta;
+  final int maxDoc;
+
+  Lucene70NormsConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+    boolean success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.createOutput(dataName, state.context);
+      CodecUtil.writeIndexHeader(data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+      String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+      meta = state.directory.createOutput(metaName, state.context);
+      CodecUtil.writeIndexHeader(meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+      maxDoc = state.segmentInfo.maxDoc();
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    boolean success = false;
+    try {
+      if (meta != null) {
+        meta.writeInt(-1); // write EOF marker
+        CodecUtil.writeFooter(meta); // write checksum
+      }
+      if (data != null) {
+        CodecUtil.writeFooter(data); // write checksum
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(data, meta);
+      } else {
+        IOUtils.closeWhileHandlingException(data, meta);
+      }
+      meta = data = null;
+    }
+  }
+
+  @Override
+  public void addNormsField(FieldInfo field, NormsProducer normsProducer) throws IOException {
+    NumericDocValues values = normsProducer.getNorms(field);
+    int numDocsWithValue = 0;
+    long min = Long.MAX_VALUE;
+    long max = Long.MIN_VALUE;
+    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+      numDocsWithValue++;
+      long v = values.longValue();
+      min = Math.min(min, v);
+      max = Math.max(max, v);
+    }
+    assert numDocsWithValue <= maxDoc;
+
+    meta.writeInt(field.number);
+
+    if (numDocsWithValue == 0) {
+      meta.writeLong(-2);
+    } else if (numDocsWithValue == maxDoc) {
+      meta.writeLong(-1);
+    } else {
+      meta.writeLong(data.getFilePointer());
+      values = normsProducer.getNorms(field);
+      SparseDISI.writeBitSet(values, maxDoc, data);
+    }
+
+    meta.writeInt(numDocsWithValue);
+    int numBytesPerValue = numBytesPerValue(min, max);
+
+    meta.writeByte((byte) numBytesPerValue);
+    if (numBytesPerValue == 0) {
+      meta.writeLong(min);
+    } else {
+      meta.writeLong(data.getFilePointer());
+      values = normsProducer.getNorms(field);
+      writeValues(values, numBytesPerValue, data);
+    }
+  }
+
+  private int numBytesPerValue(long min, long max) {
+    if (min >= max) {
+      return 0;
+    } else if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
+      return 1;
+    } else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
+      return 2;
+    } else if (min >= Integer.MIN_VALUE && max <= Integer.MAX_VALUE) {
+      return 4;
+    } else {
+      return 8;
+    }
+  }
+
+  private void writeValues(NumericDocValues values, int numBytesPerValue, IndexOutput out) throws IOException, AssertionError {
+    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+      long value = values.longValue();
+      switch (numBytesPerValue) {
+        case 1:
+          out.writeByte((byte) value);
+          break;
+        case 2:
+          out.writeShort((short) value);
+          break;
+        case 4:
+          out.writeInt((int) value);
+          break;
+        case 8:
+          out.writeLong(value);
+          break;
+        default:
+          throw new AssertionError();
+      }
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsFormat.java
@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.NormsConsumer;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * Lucene 7.0 Score normalization format.
+ * <p>
+ * Encodes normalization values by encoding each value with the minimum
+ * number of bytes needed to represent the range (which can be zero).
+ * <p>
+ * Files:
+ * <ol>
+ *   <li><tt>.nvd</tt>: Norms data</li>
+ *   <li><tt>.nvm</tt>: Norms metadata</li>
+ * </ol>
+ * <ol>
+ *   <li><a name="nvm"></a>
+ *   <p>The Norms metadata or .nvm file.</p>
+ *   <p>For each norms field, this stores metadata, such as the offset into the 
+ *      Norms data (.nvd)</p>
+ *   <p>Norms metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup>,Footer</p>
+ *   <ul>
+ *     <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}</li>
+ *     <li>Entry --&gt; FieldNumber, DocsWithFieldAddress, NumDocsWithField, BytesPerNorm, NormsAddress</li>
+ *     <li>FieldNumber --&gt; {@link DataOutput#writeInt Int32}</li>
+ *     <li>DocsWithFieldAddress --&gt; {@link DataOutput#writeLong Int64}</li>
+ *     <li>NumDocsWithField --&gt; {@link DataOutput#writeInt Int32}</li>
+ *     <li>BytesPerNorm --&gt; {@link DataOutput#writeByte byte}</li>
+ *     <li>NormsAddress --&gt; {@link DataOutput#writeLong Int64}</li>
+ *     <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
+ *   </ul>
+ *   <p>FieldNumber of -1 indicates the end of metadata.</p>
+ *   <p>NormsAddress is the pointer to the start of the data in the norms data (.nvd), or the singleton value 
+ *      when BytesPerValue = 0. If BytesPerValue is different from 0 then there are NumDocsWithField values
+ *      to read at that offset.</p>
+ *   <p>DocsWithFieldAddress is the pointer to the start of the bit set containing documents that have a norm
+ *      in the norms data (.nvd), or -2 if no documents have a norm value, or -1 if all documents have a norm
+ *      value.</p>
+ *   <li><a name="nvd"></a>
+ *   <p>The Norms data or .nvd file.</p>
+ *   <p>For each Norms field, this stores the actual per-document data (the heavy-lifting)</p>
+ *   <p>Norms data (.nvd) --&gt; Header,&lt; Data &gt;<sup>NumFields</sup>,Footer</p>
+ *   <ul>
+ *     <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}</li>
+ *     <li>DocsWithFieldData --&gt; Bit set of MaxDoc bits</li>
+ *     <li>NormsData --&gt; {@link DataOutput#writeByte(byte) byte}<sup>NumDocsWithField * BytesPerValue</sup></li>
+ *     <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
+ *   </ul>
+ * </ol>
+ * @lucene.experimental
+ */
+public class Lucene70NormsFormat extends NormsFormat {
+
+  /** Sole Constructor */
+  public Lucene70NormsFormat() {}
+  
+  @Override
+  public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
+    return new Lucene70NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+  }
+
+  @Override
+  public NormsProducer normsProducer(SegmentReadState state) throws IOException {
+    return new Lucene70NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+  }
+  
+  private static final String DATA_CODEC = "Lucene70NormsData";
+  private static final String DATA_EXTENSION = "nvd";
+  private static final String METADATA_CODEC = "Lucene70NormsMetadata";
+  private static final String METADATA_EXTENSION = "nvm";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+import static org.apache.lucene.codecs.lucene70.Lucene70NormsFormat.VERSION_CURRENT;
+import static org.apache.lucene.codecs.lucene70.Lucene70NormsFormat.VERSION_START;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.RandomAccessInput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Reader for {@link Lucene70NormsFormat}
+ */
+final class Lucene70NormsProducer extends NormsProducer {
+  // metadata maps (just file pointers and minimal stuff)
+  private final Map<Integer,NormsEntry> norms = new HashMap<>();
+  private final IndexInput data;
+  private final int maxDoc;
+
+  Lucene70NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+    maxDoc = state.segmentInfo.maxDoc();
+    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+    int version = -1;
+
+    // read in the entries from the metadata file.
+    try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
+      Throwable priorE = null;
+      try {
+        version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+        readFields(in, state.fieldInfos);
+      } catch (Throwable exception) {
+        priorE = exception;
+      } finally {
+        CodecUtil.checkFooter(in, priorE);
+      }
+    }
+
+    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+    data = state.directory.openInput(dataName, state.context);
+    boolean success = false;
+    try {
+      final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch: meta=" + version + ",data=" + version2, data);
+      }
+
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
+  }
+
+  static class NormsEntry {
+    byte bytesPerNorm;
+    long docsWithFieldOffset;
+    int numDocsWithField;
+    long normsOffset;
+  }
+
+  static abstract class LongValues {
+    abstract long get(int index) throws IOException;
+  }
+
+  private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
+    for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
+      FieldInfo info = infos.fieldInfo(fieldNumber);
+      if (info == null) {
+        throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
+      } else if (!info.hasNorms()) {
+        throw new CorruptIndexException("Invalid field: " + info.name, meta);
+      }
+      NormsEntry entry = new NormsEntry();
+      entry.docsWithFieldOffset = meta.readLong();
+      entry.numDocsWithField = meta.readInt();
+      entry.bytesPerNorm = meta.readByte();
+      switch (entry.bytesPerNorm) {
+        case 0: case 1: case 2: case 4: case 8:
+          break;
+        default:
+          throw new CorruptIndexException("Invalid bytesPerValue: " + entry.bytesPerNorm + ", field: " + info.name, meta);
+      }
+      entry.normsOffset = meta.readLong();
+      norms.put(info.number, entry);
+    }
+  }
+
+  @Override
+  public NumericDocValues getNorms(FieldInfo field) throws IOException {
+    final NormsEntry entry = norms.get(field.number);
+    if (entry.docsWithFieldOffset == -2) {
+      // empty
+      return DocValues.emptyNumeric();
+    } else if (entry.docsWithFieldOffset == -1) {
+      // dense
+      final LongValues normValues = getNormValues(entry);
+      return new NumericDocValues() {
+
+        int doc = -1;
+
+        @Override
+        public long longValue() throws IOException {
+          return normValues.get(doc);
+        }
+
+        @Override
+        public int docID() {
+          return doc;
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+          return advance(doc + 1);
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+          if (target >= maxDoc) {
+            return doc = NO_MORE_DOCS;
+          }
+          return doc = target;
+        }
+
+        @Override
+        public long cost() {
+          return maxDoc;
+        }
+
+      };
+    } else {
+      // sparse
+      final LongValues normValues = getNormValues(entry);
+      final SparseDISI disi;
+      synchronized (data) {
+        disi = new SparseDISI(maxDoc, data, entry.docsWithFieldOffset, entry.numDocsWithField);
+      }
+      return new NumericDocValues() {
+
+        @Override
+        public int advance(int target) throws IOException {
+          return disi.advance(target);
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+          return disi.nextDoc();
+        }
+
+        @Override
+        public int docID() {
+          return disi.docID();
+        }
+
+        @Override
+        public long cost() {
+          return entry.numDocsWithField;
+        }
+
+        @Override
+        public long longValue() throws IOException {
+          return normValues.get(disi.index());
+        }
+      };
+    }
+  }
+
+  private LongValues getNormValues(NormsEntry entry) throws IOException {
+    if (entry.bytesPerNorm == 0) {
+      return new LongValues() {
+        @Override
+        long get(int index) {
+          return entry.normsOffset;
+        }
+      };
+    } else {
+      RandomAccessInput slice;
+      synchronized (data) {
+        slice = data.randomAccessSlice(entry.normsOffset, entry.numDocsWithField * (long) entry.bytesPerNorm);
+      }
+      switch (entry.bytesPerNorm) {
+        case 1:
+          return new LongValues() {
+            @Override
+            long get(int index) throws IOException {
+              return slice.readByte(index);
+            }
+          };
+        case 2:
+          return new LongValues() {
+            @Override
+            long get(int index) throws IOException {
+              return slice.readShort(((long) index) << 1);
+            }
+          };
+        case 4:
+          return new LongValues() {
+            @Override
+            long get(int index) throws IOException {
+              return slice.readInt(((long) index) << 2);
+            }
+          };
+        case 8:
+          return new LongValues() {
+            @Override
+            long get(int index) throws IOException {
+              return slice.readLong(((long) index) << 3);
+            }
+          };
+        default:
+          // should not happen, we already validate bytesPerNorm in readFields
+          throw new AssertionError();
+      }
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    data.close();
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return 64L * norms.size(); // good enough
+  }
+
+  @Override
+  public void checkIntegrity() throws IOException {
+    CodecUtil.checksumEntireFile(data);
+  }
+
+  @Override
+  public String toString() {
+    return getClass().getSimpleName() + "(fields=" + norms.size() + ")";
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/SparseDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/SparseDISI.java
@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RandomAccessInput;
+
+final class SparseDISI extends DocIdSetIterator {
+
+  static void writeBitSet(DocIdSetIterator it, int maxDoc, IndexOutput out) throws IOException {
+    int currentIndex = 0;
+    long currentBits = 0;
+    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+      final int index = doc >>> 6;
+      if (index > currentIndex) {
+        out.writeLong(currentBits);
+        for (int i = currentIndex + 1; i < index; ++i) {
+          out.writeLong(0L);
+        }
+        currentIndex = index;
+        currentBits = 0L;
+      }
+      currentBits |= 1L << doc;
+    }
+
+    out.writeLong(currentBits);
+    final int maxIndex = (maxDoc - 1) >>> 6;
+    for (int i = currentIndex + 1; i <= maxIndex; ++i) {
+      out.writeLong(0L);
+    }
+  }
+
+  final int maxDoc;
+  final int numWords;
+  final long cost;
+  final RandomAccessInput slice;
+  int doc = -1;
+  int wordIndex = -1;
+  long word;
+  int index = -1;
+
+  SparseDISI(int maxDoc, IndexInput in, long offset, long cost) throws IOException {
+    this.maxDoc = maxDoc;
+    this.numWords = (int) ((maxDoc + 63L) >>> 6);
+    this.slice = in.randomAccessSlice(offset, numWords * 8L);
+    this.cost = cost;
+  }
+
+  @Override
+  public int advance(int target) throws IOException {
+    if (target >= maxDoc) {
+      return doc = NO_MORE_DOCS;
+    }
+
+    final int targetWordIndex = target >>> 6;
+    for (int i = wordIndex + 1; i <= targetWordIndex; ++i) {
+      word = slice.readLong(i << 3);
+      index += Long.bitCount(word);
+    }
+    wordIndex = targetWordIndex;
+
+    long leftBits = word >>> target;
+    if (leftBits != 0L) {
+      return doc = target + Long.numberOfTrailingZeros(leftBits);
+    }
+
+    while (++wordIndex < numWords) {
+      word = slice.readLong(wordIndex << 3);
+      if (word != 0) {
+        index += Long.bitCount(word);
+        return doc = (wordIndex << 6) + Long.numberOfTrailingZeros(word);
+      }
+    }
+
+    return doc = NO_MORE_DOCS;
+  }
+
+  @Override
+  public int nextDoc() throws IOException {
+    return advance(doc + 1);
+  }
+
+  @Override
+  public int docID() {
+    return doc;
+  }
+
+  @Override
+  public long cost() {
+    return cost;
+  }
+
+  public int index() {
+    return index - Long.bitCount(word >>> doc) + 1;
+  }
+
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
@ -163,7 +163,7 @@
 * all documents omit position data.
 * </li>
 * <li>
- * {@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Normalization factors}. 
+ * {@link org.apache.lucene.codecs.lucene70.Lucene70NormsFormat Normalization factors}. 
 * For each field in each document, a value is stored
 * that is multiplied into the score for hits on that field.
 * </li>
@ -278,12 +278,12 @@
 * <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
 * </tr>
 * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Norms}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene70.Lucene70NormsFormat Norms}</td>
 * <td>.nvd, .nvm</td>
 * <td>Encodes length and boost factors for docs and fields</td>
 * </tr>
 * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-Document Values}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat Per-Document Values}</td>
 * <td>.dvd, .dvm</td>
 * <td>Encodes additional scoring factors or other per-document information.</td>
 * </tr>
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -665,8 +665,17 @@ final class DefaultIndexingChain extends DocConsumer {
    }

    public void finish() throws IOException {
-      if (fieldInfo.omitsNorms() == false && invertState.length != 0) {
-        norms.addValue(docState.docID, similarity.computeNorm(invertState));
+      if (fieldInfo.omitsNorms() == false) {
+        long normValue;
+        if (invertState.length == 0) {
+          // the field exists in this document, but it did not have
+          // any indexed tokens, so we assign a default value of zero
+          // to the norm
+          normValue = 0;
+        } else {
+          normValue = similarity.computeNorm(invertState);
+        }
+        norms.addValue(docState.docID, normValue);
      }

      termsHashPerField.finish();
--- a/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java
@ -21,7 +21,10 @@ import java.io.IOException;

 import org.apache.lucene.codecs.NormsConsumer;
 import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;

@ -29,29 +32,34 @@ import org.apache.lucene.util.packed.PackedLongValues;
 *  segment flushes. */
 class NormValuesWriter {

-  private final static long MISSING = 0L;
-
+  private FixedBitSet docsWithField;
  private PackedLongValues.Builder pending;
  private final Counter iwBytesUsed;
  private long bytesUsed;
  private final FieldInfo fieldInfo;
+  private int lastDocID = -1;

  public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
+    docsWithField = new FixedBitSet(64);
    pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
-    bytesUsed = pending.ramBytesUsed();
+    bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
    this.fieldInfo = fieldInfo;
    this.iwBytesUsed = iwBytesUsed;
    iwBytesUsed.addAndGet(bytesUsed);
  }

  public void addValue(int docID, long value) {
-    // Fill in any holes:
-    for (int i = (int)pending.size(); i < docID; ++i) {
-      pending.add(MISSING);
+    if (docID <= lastDocID) {
+      throw new IllegalArgumentException("Norm for \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed per field)");
    }

    pending.add(value);
+    docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
+    docsWithField.set(docID);
+
    updateBytesUsed();
+
+    lastDocID = docID;
  }

  private void updateBytesUsed() {
@ -65,7 +73,6 @@ class NormValuesWriter {

  public void flush(SegmentWriteState state, NormsConsumer normsConsumer) throws IOException {

-    final int maxDoc = state.segmentInfo.maxDoc();
    final PackedLongValues values = pending.build();

    normsConsumer.addNormsField(fieldInfo,
@ -75,7 +82,7 @@ class NormValuesWriter {
                                   if (fieldInfo != NormValuesWriter.this.fieldInfo) {
                                     throw new IllegalArgumentException("wrong fieldInfo");
                                   }
-                                   return new BufferedNorms(maxDoc, values);
+                                   return new BufferedNorms(values, docsWithField);
                                  }

                                  @Override
@ -98,36 +105,28 @@ class NormValuesWriter {
  // iterates over the values we have in ram
  private static class BufferedNorms extends NumericDocValues {
    final PackedLongValues.Iterator iter;
-    final int size;
-    final int maxDoc;
-    private int docID = -1;
+    final DocIdSetIterator docsWithField;
    private long value;
-    
-    BufferedNorms(int maxDoc, PackedLongValues values) {
-      this.maxDoc = maxDoc;
+
+    BufferedNorms(PackedLongValues values, FixedBitSet docsWithFields) {
      this.iter = values.iterator();
-      this.size = (int) values.size();
-    }
-    
-    @Override
-    public int docID() {
-      return docID;
+      this.docsWithField = new BitSetIterator(docsWithFields, values.size());
    }

    @Override
-    public int nextDoc() {
-      docID++;
-      if (docID == maxDoc) {
-        docID = NO_MORE_DOCS;
-      }
-      if (docID < size) {
+    public int docID() {
+      return docsWithField.docID();
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      int docID = docsWithField.nextDoc();
+      if (docID != NO_MORE_DOCS) {
        value = iter.next();
-      } else {
-        value = MISSING;
      }
      return docID;
    }
-    
+
    @Override
    public int advance(int target) {
      throw new UnsupportedOperationException();
@ -135,7 +134,7 @@ class NormValuesWriter {

    @Override
    public long cost() {
-      return maxDoc;
+      return docsWithField.cost();
    }

    @Override
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@ -13,5 +13,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

-org.apache.lucene.codecs.lucene62.Lucene62Codec
 org.apache.lucene.codecs.lucene70.Lucene70Codec
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@ -13,5 +13,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

-org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat
 org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70NormsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70NormsFormat.java
@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.lucene70.Lucene70Codec;
+import org.apache.lucene.index.BaseNormsFormatTestCase;
+
+/**
+ * Tests Lucene70NormsFormat
+ */
+public class TestLucene70NormsFormat extends BaseNormsFormatTestCase {
+  private final Codec codec = new Lucene70Codec();
+  
+  @Override
+  protected Codec getCodec() {
+    return codec;
+  }
+}
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestSparseDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestSparseDISI.java
@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BitSetIterator;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestSparseDISI extends LuceneTestCase {
+
+  public void testRandom() throws IOException {
+    try (Directory dir = newDirectory()) {
+      for (int i = 0; i < 1000; ++i) {
+        doTestRandom(dir);
+      }
+    }
+  }
+
+  private void doTestRandom(Directory dir) throws IOException {
+    List<Integer> docs = new ArrayList<>();
+    final int maxStep = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 2, 10));
+    final int numDocs = TestUtil.nextInt(random(), 1, 1000);
+    for (int doc = -1, i = 0; i < numDocs; ++i) {
+      doc += TestUtil.nextInt(random(), 1, maxStep);
+      docs.add(doc);
+    }
+    final int maxDoc = docs.get(docs.size() - 1) + TestUtil.nextInt(random(), 1, 100);
+
+    FixedBitSet set = new FixedBitSet(maxDoc);
+    for (int doc : docs) {
+      set.set(doc);
+    }
+
+    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
+      SparseDISI.writeBitSet(new BitSetIterator(set, docs.size()), maxDoc, out);
+    }
+
+    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
+      SparseDISI disi = new SparseDISI(maxDoc, in, 0L, docs.size());
+      BitSetIterator disi2 = new BitSetIterator(set, docs.size());
+      int i = 0;
+      for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {
+        assertEquals(doc, disi.nextDoc());
+        assertEquals(i++, disi.index());
+      }
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
+    }
+
+    for (int step : new int[] {1, 20, maxStep, maxStep * 10}) {
+      try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
+        SparseDISI disi = new SparseDISI(maxDoc, in, 0L, docs.size());
+        BitSetIterator disi2 = new BitSetIterator(set, docs.size());
+        while (true) {
+          int target = disi2.docID() + step;
+          int doc = disi2.advance(target);
+          assertEquals(doc, disi.advance(target));
+          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+            break;
+          }
+          int index = Collections.binarySearch(docs, doc);
+          assertEquals(index, disi.index());
+        }
+      }
+    }
+
+    dir.deleteFile("foo");
+  }
+
+}
--- a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
@ -23,6 +23,7 @@ import java.util.Random;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.TermStatistics;
@ -186,4 +187,24 @@ public class TestNorms extends LuceneTestCase {
      throw new UnsupportedOperationException();
    }
  } 
+
+  public void testEmptyValueVsNoValue() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig cfg = newIndexWriterConfig().setMergePolicy(newLogMergePolicy());
+    IndexWriter w = new IndexWriter(dir, cfg);
+    Document doc = new Document();
+    w.addDocument(doc);
+    doc.add(newTextField("foo", "", Store.NO));
+    w.addDocument(doc);
+    w.forceMerge(1);
+    IndexReader reader = DirectoryReader.open(w);
+    w.close();
+    LeafReader leafReader = getOnlyLeafReader(reader);
+    NumericDocValues normValues = leafReader.getNormValues("foo");
+    assertNotNull(normValues);
+    assertEquals(1, normValues.nextDoc()); // doc 0 does not have norms
+    assertEquals(0, normValues.longValue());
+    reader.close();
+    dir.close();
+  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
@ -30,9 +30,11 @@ import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.TestUtil;

 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@ -46,12 +48,17 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 * if there is some bug in a given NormsFormat that this
 * test fails to catch then this test needs to be improved! */
 public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCase {
-  
+
+  /** Whether the codec supports sparse values. */
+  protected boolean codecSupportsSparsity() {
+    return true;
+  }
+
  public void testByteRange() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
@ -59,12 +66,26 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testSparseByteRange() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+        }
+      });
+    }
+  }
+
  public void testShortRange() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return TestUtil.nextLong(r, Short.MIN_VALUE, Short.MAX_VALUE);
@ -72,12 +93,26 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testSparseShortRange() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return TestUtil.nextLong(r, Short.MIN_VALUE, Short.MAX_VALUE);
+        }
+      });
+    }
+  }
+
  public void testLongRange() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return TestUtil.nextLong(r, Long.MIN_VALUE, Long.MAX_VALUE);
@ -85,12 +120,26 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testSparseLongRange() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return TestUtil.nextLong(r, Long.MIN_VALUE, Long.MAX_VALUE);
+        }
+      });
+    }
+  }
+
  public void testFullLongRange() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          int thingToDo = r.nextInt(3);
@ -103,12 +152,31 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testSparseFullLongRange() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          int thingToDo = r.nextInt(3);
+          switch (thingToDo) {
+            case 0: return Long.MIN_VALUE;
+            case 1: return Long.MAX_VALUE;
+            default:  return TestUtil.nextLong(r, Long.MIN_VALUE, Long.MAX_VALUE);
+          }
+        }
+      });
+    }
+  }
+
  public void testFewValues() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return r.nextBoolean() ? 20 : 3;
@ -116,12 +184,26 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testFewSparseValues() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return r.nextBoolean() ? 20 : 3;
+        }
+      });
+    }
+  }
+
  public void testFewLargeValues() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return r.nextBoolean() ? 1000000L : -5000;
@ -129,11 +211,25 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testFewSparseLargeValues() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return r.nextBoolean() ? 1000000L : -5000;
+        }
+      });
+    }
+  }
+
  public void testAllZeros() throws Exception {
    int iterations = atLeast(1);
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return 0;
@ -141,12 +237,25 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
-  public void testSparse() throws Exception {
+
+  public void testSparseAllZeros() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    for (int i = 0; i < iterations; i++) {
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return 0;
+        }
+      });
+    }
+  }
+
+  public void testMostZeros() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return r.nextInt(100) == 0 ? TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE) : 0;
@ -160,7 +269,7 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
      final long commonValue = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return r.nextInt(100) == 0 ? TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE) : commonValue;
@ -168,14 +277,29 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testSparseOutliers() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      final long commonValue = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return r.nextInt(100) == 0 ? TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE) : commonValue;
+        }
+      });
+    }
+  }
+
  public void testOutliers2() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
      final long commonValue = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
      final long uncommonValue = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
-      doTestNormsVersusDocValues(new LongProducer() {
+      doTestNormsVersusDocValues(1, new LongProducer() {
        @Override
        long next() {
          return r.nextInt(100) == 0 ? uncommonValue : commonValue;
@ -183,7 +307,23 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      });
    }
  }
-  
+
+  public void testSparseOutliers2() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; i++) {
+      final long commonValue = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+      final long uncommonValue = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+      doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+        @Override
+        long next() {
+          return r.nextInt(100) == 0 ? uncommonValue : commonValue;
+        }
+      });
+    }
+  }
+
  public void testNCommon() throws Exception {
    final Random r = random();
    final int N = TestUtil.nextInt(r, 2, 15);
@ -196,14 +336,35 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
    for (int j = 0; j < numOtherValues; ++j) {
      otherValues[j] = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
    }
-    doTestNormsVersusDocValues(new LongProducer() {
+    doTestNormsVersusDocValues(1, new LongProducer() {
      @Override
      long next() {
        return r.nextInt(100) == 0 ? otherValues[r.nextInt(numOtherValues - 1)] : commonValues[r.nextInt(N - 1)];
      }
    });
  }
-  
+
+  public void testSparseNCommon() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    final Random r = random();
+    final int N = TestUtil.nextInt(r, 2, 15);
+    final long[] commonValues = new long[N];
+    for (int j = 0; j < N; ++j) {
+      commonValues[j] = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+    }
+    final int numOtherValues = TestUtil.nextInt(r, 2, 256 - N);
+    final long[] otherValues = new long[numOtherValues];
+    for (int j = 0; j < numOtherValues; ++j) {
+      otherValues[j] = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+    }
+    doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+      @Override
+      long next() {
+        return r.nextInt(100) == 0 ? otherValues[r.nextInt(numOtherValues - 1)] : commonValues[r.nextInt(N - 1)];
+      }
+    });
+  }
+
  /**
   * a more thorough n-common that tests all low bpv
   */
@ -224,7 +385,7 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
        for (int j = 0; j < numOtherValues; ++j) {
          otherValues[j] = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
        }
-        doTestNormsVersusDocValues(new LongProducer() {
+        doTestNormsVersusDocValues(1, new LongProducer() {
          @Override
          long next() {
            return r.nextInt(100) == 0 ? otherValues[r.nextInt(numOtherValues - 1)] : commonValues[r.nextInt(N - 1)];
@ -233,17 +394,62 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      }
    }
  }
-  
-  private void doTestNormsVersusDocValues(LongProducer longs) throws Exception {
+
+  /**
+   * a more thorough n-common that tests all low bpv and sparse docs
+   */
+  @Nightly
+  public void testSparseNCommonBig() throws Exception {
+    assumeTrue("Requires sparse norms support", codecSupportsSparsity());
+    final int iterations = atLeast(1);
+    final Random r = random();
+    for (int i = 0; i < iterations; ++i) {
+      // 16 is 4 bpv, the max before we jump to 8bpv
+      for (int n = 2; n < 16; ++n) {
+        final int N = n;
+        final long[] commonValues = new long[N];
+        for (int j = 0; j < N; ++j) {
+          commonValues[j] = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+        }
+        final int numOtherValues = TestUtil.nextInt(r, 2, 256 - N);
+        final long[] otherValues = new long[numOtherValues];
+        for (int j = 0; j < numOtherValues; ++j) {
+          otherValues[j] = TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+        }
+        doTestNormsVersusDocValues(random().nextDouble(), new LongProducer() {
+          @Override
+          long next() {
+            return r.nextInt(100) == 0 ? otherValues[r.nextInt(numOtherValues - 1)] : commonValues[r.nextInt(N - 1)];
+          }
+        });
+      }
+    }
+  }
+
+  private void doTestNormsVersusDocValues(double density, LongProducer longs) throws Exception {
    int numDocs = atLeast(500);
-    long norms[] = new long[numDocs];
-    for (int i = 0; i < numDocs; i++) {
+    final FixedBitSet docsWithField = new FixedBitSet(numDocs);
+    final int numDocsWithField = Math.max(1, (int) (density * numDocs));
+    if (numDocsWithField == numDocs) {
+      docsWithField.set(0, numDocs);
+    } else {
+      int i = 0;
+      while (i < numDocsWithField) {
+        int doc = random().nextInt(numDocs);
+        if (docsWithField.get(doc) == false) {
+          docsWithField.set(doc);
+          ++i;
+        }
+      }
+    }
+    long norms[] = new long[numDocsWithField];
+    for (int i = 0; i < numDocsWithField; i++) {
      norms[i] = longs.next();
    }
    
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
-    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
+    IndexWriterConfig conf = newIndexWriterConfig(analyzer);conf.setMergePolicy(NoMergePolicy.INSTANCE);
    conf.setSimilarity(new CannedNormSimilarity(norms));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Document doc = new Document();
@ -254,12 +460,18 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
    doc.add(indexedField);
    doc.add(dvField);
    
-    for (int i = 0; i < numDocs; i++) {
+    for (int i = 0, j = 0; i < numDocs; i++) {
      idField.setStringValue(Integer.toString(i));
-      long value = norms[i];
-      dvField.setLongValue(value);
-      indexedField.setStringValue(Long.toString(value));
-      writer.addDocument(doc);
+      if (docsWithField.get(i) == false) {
+        Document doc2 = new Document();
+        doc2.add(idField);
+        writer.addDocument(doc2);
+      } else {
+        long value = norms[j++];
+        dvField.setLongValue(value);
+        indexedField.setStringValue(Long.toString(value));
+        writer.addDocument(doc);
+      }
      if (random().nextInt(31) == 0) {
        writer.commit();
      }
@ -280,12 +492,14 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      LeafReader r = context.reader();
      NumericDocValues expected = r.getNumericDocValues("dv");
      NumericDocValues actual = r.getNormValues("indexed");
-      for (int i = 0; i < r.maxDoc(); i++) {
-        assertEquals(i, expected.nextDoc());
-        assertEquals(i, actual.nextDoc());
-        assertEquals("doc " + i, expected.longValue(), actual.longValue());
+      assertEquals(expected == null, actual == null);
+      if (expected != null) {
+        for (int d = expected.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = expected.nextDoc()) {
+          assertEquals(d, actual.nextDoc());
+          assertEquals("doc " + d, expected.longValue(), actual.longValue());
+        }
+        assertEquals(NO_MORE_DOCS, actual.nextDoc());
      }
-      assertEquals(NO_MORE_DOCS, expected.nextDoc());
    }
    ir.close();
    
@ -297,10 +511,13 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
      LeafReader r = context.reader();
      NumericDocValues expected = r.getNumericDocValues("dv");
      NumericDocValues actual = r.getNormValues("indexed");
-      for (int i = 0; i < r.maxDoc(); i++) {
-        assertEquals(i, expected.nextDoc());
-        assertEquals(i, actual.nextDoc());
-        assertEquals("doc " + i, expected.longValue(), actual.longValue());
+      assertEquals(expected == null, actual == null);
+      if (expected != null) {
+        for (int d = expected.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = expected.nextDoc()) {
+          assertEquals(d, actual.nextDoc());
+          assertEquals("doc " + d, expected.longValue(), actual.longValue());
+        }
+        assertEquals(NO_MORE_DOCS, actual.nextDoc());
      }
    }
    
@ -403,9 +620,13 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
    // be undead:
    NumericDocValues norms = MultiDocValues.getNormValues(r, "content");
    assertNotNull(norms);
-    for(int i=0;i<r.maxDoc();i++) {
-      assertEquals(i, norms.nextDoc());
-      assertEquals(0, norms.longValue());
+    if (codecSupportsSparsity()) {
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, norms.nextDoc());
+    } else {
+      for(int i=0;i<r.maxDoc();i++) {
+        assertEquals(i, norms.nextDoc());
+        assertEquals(0, norms.longValue());
+      }
    }

    r.close();