From 606205c642c6acf02c5ef143de0bededcf56b667 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 26 Apr 2012 16:09:08 +0000 Subject: [PATCH] LUCENE-2946: doc 4.0 stored fields format git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1330915 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene40/Lucene40StoredFieldsFormat.java | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java index a57ddd9ef60..240d16df225 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java @@ -25,10 +25,58 @@ import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.DataOutput; // javadocs import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -/** @lucene.experimental */ +/** + * Lucene 4.0 Stored Fields Format. + *

Stored fields are represented by two files:

+ *
    + *
  1. + *

    The field index, or .fdx file.

    + *

    This is used to find the location within the field data file of the fields + * of a particular document. Because it contains fixed-length data, this file may + * be easily randomly accessed. The position of document n 's field data is + * the {@link DataOutput#writeLong Uint64} at n*8 in this file.

    + *

    This contains, for each document, a pointer to its field data, as + * follows:

    + *
      + *
    • FieldIndex (.fdx) --> <FieldValuesPosition> SegSize
    • + *
    • FieldValuesPosition --> {@link DataOutput#writeLong Uint64}
    • + *
    + *
  2. + *
  3. + *

    The field data, or .fdt file.

    + *

    This contains the stored fields of each document, as follows:

    + *
      + *
    • FieldData (.fdt) --> <DocFieldData> SegSize
    • + *
    • DocFieldData --> FieldCount, <FieldNum, Bits, Value> + * FieldCount
    • + *
    • FieldCount --> {@link DataOutput#writeVInt VInt}
    • + *
    • FieldNum --> {@link DataOutput#writeVInt VInt}
    • + *
    • Bits --> {@link DataOutput#writeByte Byte}
    • + *
        + *
      • low order bit reserved.
      • + *
      • second bit is one for fields containing binary data
      • + *
      • third bit reserved.
      • + *
      • 4th to 6th bit (mask: 0x7<<3) define the type of a numeric field: + *
          + *
        • all bits in mask are cleared if no numeric field at all
        • + *
        • 1<<3: Value is Int
        • + *
        • 2<<3: Value is Long
        • + *
        • 3<<3: Value is Int as Float (as of {@link Float#intBitsToFloat(int)}
        • + *
        • 4<<3: Value is Long as Double (as of {@link Double#longBitsToDouble(long)}
        • + *
        + *
      • + *
      + *
    • Value --> String | BinaryValue | Int | Long (depending on Bits)
    • + *
    • BinaryValue --> ValueSize, <{@link DataOutput#writeByte Byte}>^ValueSize
    • + *
    • ValueSize --> {@link DataOutput#writeVInt VInt}
    • + * + *
    + *
+ * @lucene.experimental */ public class Lucene40StoredFieldsFormat extends StoredFieldsFormat { @Override