LUCENE-5743: Add Lucene49NormsFormat

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1601606 13f79535-47bb-0310-9956-ffa450edef68
2014-06-10 11:35:48 +00:00 · 2014-06-10 11:35:48 +00:00 · 6384ae9fb7
parent 84e1847228
commit 6384ae9fb7
14 changed files with 955 additions and 3 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -129,6 +129,9 @@ New Features
  from Directory. Add Lucene49Codec and Lucene49DocValuesFormat that make
  use of these.  (Robert Muir)
 * LUCENE-5743: Add Lucene49NormsFormat, which can compress in some cases
  such as very short fields.  (Ryan Ernst, Adrien Grand, Robert Muir)
 Changes in Backwards Compatibility Policy
 * LUCENE-5634: Add reuse argument to IndexableField.tokenStream. This
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskNormsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskNormsFormat.java
@ -0,0 +1,32 @@
 package org.apache.lucene.codecs.diskdv;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
 import org.apache.lucene.index.BaseNormsFormatTestCase;
 /** Tests DiskNormsFormat */
 public class TestDiskNormsFormat extends BaseNormsFormatTestCase {
  private final Codec codec = new CheapBastardCodec();
  @Override
  protected Codec getCodec() {
    return codec;
  }
 }
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextNormsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextNormsFormat.java
@ -0,0 +1,31 @@
 package org.apache.lucene.codecs.simpletext;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseNormsFormatTestCase;
 /** Tests SimpleTextNormsFormat */
 public class TestSimpleTextNormsFormat extends BaseNormsFormatTestCase {
  private final Codec codec = new SimpleTextCodec();
  @Override
  protected Codec getCodec() {
    return codec;
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java
@ -131,7 +131,7 @@ public class Lucene49Codec extends Codec {
  private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
  private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene49");
-  private final NormsFormat normsFormat = new Lucene42NormsFormat();
+  private final NormsFormat normsFormat = new Lucene49NormsFormat();
  @Override
  public final NormsFormat normsFormat() {
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene49;
 import java.io.Closeable; // javadocs
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@ -199,6 +200,7 @@ public class Lucene49DocValuesConsumer extends DocValuesConsumer implements Clos
        break;
      case TABLE_COMPRESSED:
        final Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
        Arrays.sort(decode);
        final HashMap<Long,Integer> encode = new HashMap<>();
        meta.writeVInt(decode.length);
        for (int i = 0; i < decode.length; i++) {
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsConsumer.java
@ -0,0 +1,208 @@
 package org.apache.lucene.codecs.lucene49;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.BlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_CURRENT;
 /**
 * Writer for {@link Lucene49NormsFormat}
 */
 class Lucene49NormsConsumer extends DocValuesConsumer { 
  static final byte DELTA_COMPRESSED = 0;
  static final byte TABLE_COMPRESSED = 1;
  static final byte CONST_COMPRESSED = 2;
  static final byte UNCOMPRESSED = 3;
  static final int BLOCK_SIZE = 16384;
  IndexOutput data, meta;
  final int maxDoc;
  Lucene49NormsConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
    maxDoc = state.segmentInfo.getDocCount();
    boolean success = false;
    try {
      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
      data = state.directory.createOutput(dataName, state.context);
      CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
      String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
      meta = state.directory.createOutput(metaName, state.context);
      CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this);
      }
    }
  }
  // we explicitly use only certain bits per value and a specified format, so we statically check this will work
  static {
    assert PackedInts.Format.PACKED_SINGLE_BLOCK.isSupported(1);
    assert PackedInts.Format.PACKED_SINGLE_BLOCK.isSupported(2);
    assert PackedInts.Format.PACKED_SINGLE_BLOCK.isSupported(4);
  }
  @Override
  public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
    meta.writeVInt(field.number);
    long minValue = Long.MAX_VALUE;
    long maxValue = Long.MIN_VALUE;
    // TODO: more efficient?
    HashSet<Long> uniqueValues = null;
    uniqueValues = new HashSet<>();
    long count = 0;
    for (Number nv : values) {
      if (nv == null) {
        throw new IllegalStateException("illegal norms data for field " + field.name + ", got null for value: " + count);
      }
      final long v = nv.longValue();
      minValue = Math.min(minValue, v);
      maxValue = Math.max(maxValue, v);
      if (uniqueValues != null) {
        if (uniqueValues.add(v)) {
          if (uniqueValues.size() > 256) {
            uniqueValues = null;
          }
        }
      }
      ++count;
    }
    if (count != maxDoc) {
      throw new IllegalStateException("illegal norms data for field " + field.name + ", expected " + maxDoc + " values, got " + count);
    }
    if (uniqueValues != null && uniqueValues.size() == 1) {
      // 0 bpv
      meta.writeByte(CONST_COMPRESSED);
      meta.writeLong(minValue);
    } else if (uniqueValues != null) {
      // small number of unique values: this is the typical case:
      // we only use bpv=1,2,4,8     
      PackedInts.Format format = PackedInts.Format.PACKED_SINGLE_BLOCK;
      int bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
      if (bitsPerValue == 3) {
        bitsPerValue = 4;
      } else if (bitsPerValue > 4) {
        bitsPerValue = 8;
      }
      if (bitsPerValue == 8 && minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
        meta.writeByte(UNCOMPRESSED); // uncompressed byte[]
        meta.writeLong(data.getFilePointer());
        for (Number nv : values) {
          data.writeByte(nv == null ? 0 : (byte) nv.longValue());
        }
      } else {
        meta.writeByte(TABLE_COMPRESSED); // table-compressed
        meta.writeLong(data.getFilePointer());
        data.writeVInt(PackedInts.VERSION_CURRENT);
        Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
        Arrays.sort(decode);
        final HashMap<Long,Integer> encode = new HashMap<>();
        // upgrade to power of two sized array
        int size = 1 << bitsPerValue;
        data.writeVInt(size);
        for (int i = 0; i < decode.length; i++) {
          data.writeLong(decode[i]);
          encode.put(decode[i], i);
        }
        for (int i = decode.length; i < size; i++) {
          data.writeLong(0);
        }
        data.writeVInt(format.getId());
        data.writeVInt(bitsPerValue);
        final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, format, maxDoc, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
        for(Number nv : values) {
          writer.add(encode.get(nv.longValue()));
        }
        writer.finish();
      }
    } else {
      meta.writeByte(DELTA_COMPRESSED); // delta-compressed
      meta.writeLong(data.getFilePointer());
      data.writeVInt(PackedInts.VERSION_CURRENT);
      data.writeVInt(BLOCK_SIZE);
      final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
      for (Number nv : values) {
        writer.add(nv.longValue());
      }
      writer.finish();
    }
  }
  @Override
  public void close() throws IOException {
    boolean success = false;
    try {
      if (meta != null) {
        meta.writeVInt(-1); // write EOF marker
        CodecUtil.writeFooter(meta); // write checksum
      }
      if (data != null) {
        CodecUtil.writeFooter(data); // write checksum
      }
      success = true;
    } finally {
      if (success) {
        IOUtils.close(data, meta);
      } else {
        IOUtils.closeWhileHandlingException(data, meta);
      }
      meta = data = null;
    }
  }
  @Override
  public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
    throw new UnsupportedOperationException();
  }
  @Override
  public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
    throw new UnsupportedOperationException();
  }
  @Override
  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
    throw new UnsupportedOperationException();
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java
@ -0,0 +1,121 @@
 package org.apache.lucene.codecs.lucene49;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.NormsFormat;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.SmallFloat;
 import org.apache.lucene.util.packed.BlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 /**
 * Lucene 4.9 Score normalization format.
 * <p>
 * Encodes normalization values with these strategies:
 * <p>
 * <ul>
 *    <li>Uncompressed: when values fit into a single byte and would require more than 4 bits
 *        per value, they are just encoded as an uncompressed byte array.
 *    <li>Constant: when there is only one value present for the entire field, no actual data
 *        is written: this constant is encoded in the metadata
 *    <li>Table-compressed: when the number of unique values is very small (&lt; 64), and
 *        when there are unused "gaps" in the range of values used (such as {@link SmallFloat}), 
 *        a lookup table is written instead. Each per-document entry is instead the ordinal 
 *        to this table, and those ordinals are compressed with bitpacking ({@link PackedInts}). 
 *    <li>Delta-compressed: per-document integers written as deltas from the minimum value,
 *        compressed with bitpacking. For more information, see {@link BlockPackedWriter}.
 *        This is only used when norms of larger than one byte are present.
 * </ul>
 * <p>
 * Files:
 * <ol>
 *   <li><tt>.nvd</tt>: Norms data</li>
 *   <li><tt>.nvm</tt>: Norms metadata</li>
 * </ol>
 * <ol>
 *   <li><a name="nvm" id="nvm"></a>
 *   <p>The Norms metadata or .nvm file.</p>
 *   <p>For each norms field, this stores metadata, such as the offset into the 
 *      Norms data (.nvd)</p>
 *   <p>Norms metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup>,Footer</p>
 *   <ul>
 *     <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *     <li>Entry --&gt; FieldNumber,Type,Offset</li>
 *     <li>FieldNumber --&gt; {@link DataOutput#writeVInt vInt}</li>
 *     <li>Type --&gt; {@link DataOutput#writeByte Byte}</li>
 *     <li>Offset --&gt; {@link DataOutput#writeLong Int64}</li>
 *     <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
 *   </ul>
 *   <p>FieldNumber of -1 indicates the end of metadata.</p>
 *   <p>Offset is the pointer to the start of the data in the norms data (.nvd), or the singleton value for Constant</p>
 *   <p>Type indicates how Numeric values will be compressed:
 *      <ul>
 *         <li>0 --&gt; delta-compressed. For each block of 16k integers, every integer is delta-encoded
 *             from the minimum value within the block. 
 *         <li>1 --&gt; table-compressed. When the number of unique numeric values is small and it would save space,
 *             a lookup table of unique values is written, followed by the ordinal for each document.
 *         <li>2 --&gt; constant. When there is a single value for the entire field.
 *         <li>3 --&gt; uncompressed: Values written as a simple byte[].
 *      </ul>
 *   <li><a name="nvd" id="nvd"></a>
 *   <p>The Norms data or .nvd file.</p>
 *   <p>For each Norms field, this stores the actual per-document data (the heavy-lifting)</p>
 *   <p>Norms data (.nvd) --&gt; Header,&lt;Uncompressed | TableCompressed | DeltaCompressed&gt;<sup>NumFields</sup>,Footer</p>
 *   <ul>
 *     <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *     <li>Uncompressed --&gt;  {@link DataOutput#writeByte Byte}<sup>maxDoc</sup></li>
 *     <li>TableCompressed --&gt; PackedIntsVersion,Table,BitPackedData</li>
 *     <li>Table --&gt; TableSize, {@link DataOutput#writeLong int64}<sup>TableSize</sup></li>
 *     <li>BitpackedData --&gt; {@link PackedInts}</li>
 *     <li>DeltaCompressed --&gt; PackedIntsVersion,BlockSize,DeltaCompressedData</li>
 *     <li>DeltaCompressedData --&gt; {@link BlockPackedWriter BlockPackedWriter(blockSize=16k)}</li>
 *     <li>PackedIntsVersion,BlockSize,TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
 *     <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
 *   </ul>
 * </ol>
 * @lucene.experimental
 */
 public class Lucene49NormsFormat extends NormsFormat {
  /** Sole Constructor */
  public Lucene49NormsFormat() {}
  @Override
  public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
    return new Lucene49NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
  }
  @Override
  public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
    return new Lucene49NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
  }
  private static final String DATA_CODEC = "Lucene49NormsData";
  private static final String DATA_EXTENSION = "nvd";
  private static final String METADATA_CODEC = "Lucene49NormsMetadata";
  private static final String METADATA_EXTENSION = "nvm";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java
@ -0,0 +1,233 @@
 package org.apache.lucene.codecs.lucene49;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicLong;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.BlockPackedReader;
 import org.apache.lucene.util.packed.PackedInts;
 import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_START;
 import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_CURRENT;
 import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.CONST_COMPRESSED;
 import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.DELTA_COMPRESSED;
 import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.TABLE_COMPRESSED;
 import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.UNCOMPRESSED;
 /**
 * Reader for {@link Lucene49NormsFormat}
 */
 class Lucene49NormsProducer extends DocValuesProducer {
  // metadata maps (just file pointers and minimal stuff)
  private final Map<Integer,NormsEntry> norms = new HashMap<>();
  private final IndexInput data;
  private final int version;
  // ram instances we have already loaded
  final Map<Integer,NumericDocValues> instances = new HashMap<>();
  private final int maxDoc;
  private final AtomicLong ramBytesUsed;
  Lucene49NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
    maxDoc = state.segmentInfo.getDocCount();
    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    // read in the entries from the metadata file.
    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
    boolean success = false;
    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
    try {
      version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT);
      readFields(in, state.fieldInfos);
      CodecUtil.checkFooter(in);
      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }
    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
    this.data = state.directory.openInput(dataName, state.context);
    success = false;
    try {
      final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
      if (version != version2) {
        throw new CorruptIndexException("Format versions mismatch");
      }
      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }
  private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
    int fieldNumber = meta.readVInt();
    while (fieldNumber != -1) {
      FieldInfo info = infos.fieldInfo(fieldNumber);
      if (info == null) {
        throw new CorruptIndexException("Invalid field number: " + fieldNumber + " (resource=" + meta + ")");
      } else if (!info.hasNorms()) {
        throw new CorruptIndexException("Invalid field: " + info.name + " (resource=" + meta + ")");
      }
      NormsEntry entry = new NormsEntry();
      entry.format = meta.readByte();
      entry.offset = meta.readLong();
      switch(entry.format) {
        case CONST_COMPRESSED:
        case UNCOMPRESSED:
        case TABLE_COMPRESSED:
        case DELTA_COMPRESSED:
          break;
        default:
          throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
      }
      norms.put(fieldNumber, entry);
      fieldNumber = meta.readVInt();
    }
  }
  @Override
  public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
    NumericDocValues instance = instances.get(field.number);
    if (instance == null) {
      instance = loadNorms(field);
      instances.put(field.number, instance);
    }
    return instance;
  }
  @Override
  public long ramBytesUsed() {
    return ramBytesUsed.get();
  }
  @Override
  public void checkIntegrity() throws IOException {
    CodecUtil.checksumEntireFile(data);
  }
  private NumericDocValues loadNorms(FieldInfo field) throws IOException {
    NormsEntry entry = norms.get(field.number);
    switch(entry.format) {
      case CONST_COMPRESSED:
        final long v = entry.offset;
        return new NumericDocValues() {
          @Override
          public long get(int docID) {
            return v;
          }
        };
      case UNCOMPRESSED:
        data.seek(entry.offset);
        final byte bytes[] = new byte[maxDoc];
        data.readBytes(bytes, 0, bytes.length);
        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes));
        return new NumericDocValues() {
          @Override
          public long get(int docID) {
            return bytes[docID];
          }
        };
      case DELTA_COMPRESSED:
        data.seek(entry.offset);
        int packedIntsVersion = data.readVInt();
        int blockSize = data.readVInt();
        final BlockPackedReader reader = new BlockPackedReader(data, packedIntsVersion, blockSize, maxDoc, false);
        ramBytesUsed.addAndGet(reader.ramBytesUsed());
        return reader;
      case TABLE_COMPRESSED:
        data.seek(entry.offset);
        int packedVersion = data.readVInt();
        int size = data.readVInt();
        if (size > 256) {
          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
        }
        final long decode[] = new long[size];
        for (int i = 0; i < decode.length; i++) {
          decode[i] = data.readLong();
        }
        final int formatID = data.readVInt();
        final int bitsPerValue = data.readVInt();
        final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), packedVersion, maxDoc, bitsPerValue);
        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed());
        return new NumericDocValues() {
          @Override
          public long get(int docID) {
            return decode[(int)ordsReader.get(docID)];
          }
        };
      default:
        throw new AssertionError();
    }
  }
  @Override
  public BinaryDocValues getBinary(FieldInfo field) throws IOException {
    throw new IllegalStateException();
  }
  @Override
  public SortedDocValues getSorted(FieldInfo field) throws IOException {
    throw new IllegalStateException();
  }
  @Override
  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    throw new IllegalStateException();
  }
  @Override
  public Bits getDocsWithField(FieldInfo field) throws IOException {
    throw new IllegalStateException();
  }
  @Override
  public void close() throws IOException {
    data.close();
  }
  static class NormsEntry {
    byte format;
    long offset;
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html
@ -173,7 +173,7 @@ term occurs in each document. Note that this will not exist if all fields in
 all documents omit position data.
 </li>
 <li>
-{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}. 
+{@link org.apache.lucene.codecs.lucene49.Lucene49NormsFormat Normalization factors}. 
 For each field in each document, a value is stored
 that is multiplied into the score for hits on that field.
 </li>
@ -289,7 +289,7 @@ systems that frequently run out of file handles.</td>
 <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
 </tr>
 <tr>
-<td>{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}</td>
+<td>{@link org.apache.lucene.codecs.lucene49.Lucene49NormsFormat Norms}</td>
 <td>.nvd, .nvm</td>
 <td>Encodes length and boost factors for docs and fields</td>
 </tr>
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java
@ -0,0 +1,38 @@
 package org.apache.lucene.codecs.lucene40;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseNormsFormatTestCase;
 import org.junit.BeforeClass;
 /** Tests Lucene40's norms format */
 public class TestLucene40NormsFormat extends BaseNormsFormatTestCase {
  final Codec codec = new Lucene40RWCodec();
  @Override
  protected Codec getCodec() {
    return codec;
  }
  @BeforeClass
  public static void beforeClass() {
    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java
@ -0,0 +1,38 @@
 package org.apache.lucene.codecs.lucene42;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseNormsFormatTestCase;
 import org.junit.BeforeClass;
 /** Tests Lucene42's norms format */
 public class TestLucene42NormsFormat extends BaseNormsFormatTestCase {
  final Codec codec = new Lucene42RWCodec();
  @Override
  protected Codec getCodec() {
    return codec;
  }
  @BeforeClass
  public static void beforeClass() {
    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java
@ -0,0 +1,33 @@
 package org.apache.lucene.codecs.lucene49;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseNormsFormatTestCase;
 /**
 * Tests Lucene49NormsFormat
 */
 public class TestLucene49NormsFormat extends BaseNormsFormatTestCase {
  final Codec codec = new Lucene49Codec();
  @Override
  protected Codec getCodec() {
    return codec;
  }  
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestNormsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestNormsFormat.java
@ -0,0 +1,30 @@
 package org.apache.lucene.index;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.codecs.Codec;
 /** Tests the codec configuration defined by LuceneTestCase randomly
 */
 public class TestNormsFormat extends BaseNormsFormatTestCase {
  @Override
  protected Codec getCodec() {
    return Codec.getDefault();
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
@ -0,0 +1,183 @@
 package org.apache.lucene.index;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.Random;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.TestUtil;
 /**
 * Abstract class to do basic tests for a norms format.
 * NOTE: This test focuses on the norms impl, nothing else.
 * The [stretch] goal is for this test to be
 * so thorough in testing a new NormsFormat that if this
 * test passes, then all Lucene/Solr tests should also pass.  Ie,
 * if there is some bug in a given NormsFormat that this
 * test fails to catch then this test needs to be improved! */
 public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCase {
  public void testByteRange() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
      doTestNormsVersusStoredFields(new LongProducer() {
        @Override
        long next() {
          return TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
        }
      });
    }
  }
  public void testLongRange() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
      doTestNormsVersusStoredFields(new LongProducer() {
        @Override
        long next() {
          return TestUtil.nextLong(r, Long.MIN_VALUE, Long.MAX_VALUE);
        }
      });
    }
  }
  public void testFewValues() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
      doTestNormsVersusStoredFields(new LongProducer() {
        @Override
        long next() {
          return r.nextBoolean() ? 20 : 3;
        }
      });
    }
  }
  public void testAllZeros() throws Exception {
    int iterations = atLeast(1);
    final Random r = random();
    for (int i = 0; i < iterations; i++) {
      doTestNormsVersusStoredFields(new LongProducer() {
        @Override
        long next() {
          return 0;
        }
      });
    }
  }
  private void doTestNormsVersusStoredFields(LongProducer longs) throws Exception {
    int numDocs = atLeast(500);
    long norms[] = new long[numDocs];
    for (int i = 0; i < numDocs; i++) {
      norms[i] = longs.next();
    }
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    conf.setSimilarity(new CannedNormSimilarity(norms));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    Field storedField = newTextField("stored", "", Field.Store.YES);
    doc.add(idField);
    doc.add(storedField);
    for (int i = 0; i < numDocs; i++) {
      idField.setStringValue(Integer.toString(i));
      long value = norms[i];
      storedField.setStringValue(Long.toString(value));
      writer.addDocument(doc);
      if (random().nextInt(31) == 0) {
        writer.commit();
      }
    }
    // delete some docs
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    writer.shutdown();
    // compare
    DirectoryReader ir = DirectoryReader.open(dir);
    for (AtomicReaderContext context : ir.leaves()) {
      AtomicReader r = context.reader();
      NumericDocValues docValues = r.getNormValues("stored");
      for (int i = 0; i < r.maxDoc(); i++) {
        long storedValue = Long.parseLong(r.document(i).get("stored"));
        assertEquals(storedValue, docValues.get(i));
      }
    }
    ir.close();
    dir.close();
  }
  static abstract class LongProducer {
    abstract long next();
  }
  static class CannedNormSimilarity extends Similarity {
    final long norms[];
    int index = 0;
    CannedNormSimilarity(long norms[]) {
      this.norms = norms;
    }
    @Override
    public long computeNorm(FieldInvertState state) {
      return norms[index++];
    }
    @Override
    public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
      throw new UnsupportedOperationException();
    }
    @Override
    public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
      throw new UnsupportedOperationException();
    }
  }
  @Override
  protected void addRandomFields(Document doc) {
    // TODO: improve
    doc.add(new TextField("foobar", "boo", Field.Store.NO));
  }
 }