diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java index 856e9bad901..7c2205aa3cf 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java @@ -40,12 +40,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; import org.apache.lucene.util.packed.PackedInts; /** - * Writes numbers one of two ways: - * 1. packed ints as deltas from minValue - * 2. packed ints as ordinals to a table (if the number of values is small, e.g. <= 256) - * - * the latter is typically much smaller with lucene's sims, as only some byte values are used, - * but its often a nonlinear mapping, especially if you dont use crazy boosts. + * Writer for {@link Lucene42DocValuesFormat} */ class Lucene42DocValuesConsumer extends DocValuesConsumer { static final int VERSION_START = 0; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java index 16f36e59511..590396635b1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java @@ -19,14 +19,92 @@ package org.apache.lucene.codecs.lucene42; import java.io.IOException; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.BlockPackedWriter; +/** + * Lucene 4.2 DocValues format. + *

+ * Encodes the three per-document value types (Numeric,Binary,Sorted) with five basic strategies. + *

+ *

+ *

+ * Files: + *

    + *
  1. .dvd: DocValues data
  2. + *
  3. .dvm: DocValues metadata
  4. + *
+ *
    + *
  1. + *

    The DocValues metadata or .dvm file.

    + *

    For DocValues field, this stores metadata, such as the offset into the + * DocValues data (.dvd)

    + *

    DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry>NumFields

    + * + *

    Sorted fields have two entries: a SortedEntry with the FST metadata, + * and an ordinary NumericEntry for the document-to-ord metadata.

    + *

    FieldNumber of -1 indicates the end of metadata.

    + *

    EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)

    + *

    DataOffset is the pointer to the start of the data in the DocValues data (.dvd)

    + *

    CompressionType indicates how Numeric values will be compressed: + *

    + *

    MinLength and MaxLength represent the min and max byte[] value lengths for Binary values. + * If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length). + * Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize) + * is written for the addresses. + *

  2. + *

    The DocValues data or .dvd file.

    + *

    For DocValues field, this stores the actual per-document data (the heavy-lifting)

    + *

    DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData>NumFields

    + * + *
+ */ public class Lucene42DocValuesFormat extends DocValuesFormat { + /** Sole constructor */ public Lucene42DocValuesFormat() { super("Lucene42"); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java index 5ce20676958..6c4ce019a8a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java @@ -46,6 +46,9 @@ import org.apache.lucene.util.packed.BlockPackedReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; import org.apache.lucene.util.packed.PackedInts; +/** + * Reader for {@link Lucene42DocValuesFormat} + */ class Lucene42DocValuesProducer extends DocValuesProducer { // metadata maps (just file pointers and minimal stuff) private final Map numerics; @@ -56,12 +59,8 @@ class Lucene42DocValuesProducer extends DocValuesProducer { // ram instances we have already loaded private final Map numericInstances = new HashMap(); - - // if this thing needs some TL state then we might put something - // else in this map. private final Map binaryInstances = new HashMap(); - private final Map> fstInstances = new HashMap>(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java index b932110a25d..9d1465bcd6f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java @@ -25,8 +25,24 @@ import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +/** + * Lucene 4.2 score normalization format. + *

+ * NOTE: this uses the same format as {@link Lucene42DocValuesFormat} + * Numeric DocValues, but with different file extensions. + *

+ * Files: + *

    + *
  • .nvd: DocValues data
  • + *
  • .nvm: DocValues metadata
  • + *
+ * @see Lucene42DocValuesFormat + */ public class Lucene42NormsFormat extends NormsFormat { + /** Sole constructor */ + public Lucene42NormsFormat() {} + @Override public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException { return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);