LUCENE-2946: doc 4.0 stored fields format

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1330915 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-04-26 16:09:08 +00:00
parent fcfa443d3c
commit 606205c642
1 changed files with 49 additions and 1 deletions

View File

@ -25,10 +25,58 @@ import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** @lucene.experimental */
/**
* Lucene 4.0 Stored Fields Format.
* <p>Stored fields are represented by two files:</p>
* <ol>
* <li><a name="field_index" id="field_index"></a>
* <p>The field index, or <tt>.fdx</tt> file.</p>
* <p>This is used to find the location within the field data file of the fields
* of a particular document. Because it contains fixed-length data, this file may
* be easily randomly accessed. The position of document <i>n</i> 's field data is
* the {@link DataOutput#writeLong Uint64} at <i>n*8</i> in this file.</p>
* <p>This contains, for each document, a pointer to its field data, as
* follows:</p>
* <ul>
* <li>FieldIndex (.fdx) --&gt; &lt;FieldValuesPosition&gt; <sup>SegSize</sup></li>
* <li>FieldValuesPosition --&gt; {@link DataOutput#writeLong Uint64}</li>
* </ul>
* </li>
* <li>
* <p><a name="field_data" id="field_data"></a>The field data, or <tt>.fdt</tt> file.</p>
* <p>This contains the stored fields of each document, as follows:</p>
* <ul>
* <li>FieldData (.fdt) --&gt; &lt;DocFieldData&gt; <sup>SegSize</sup></li>
* <li>DocFieldData --&gt; FieldCount, &lt;FieldNum, Bits, Value&gt;
* <sup>FieldCount</sup></li>
* <li>FieldCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldNum --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Bits --&gt; {@link DataOutput#writeByte Byte}</li>
* <ul>
* <li>low order bit reserved.</li>
* <li>second bit is one for fields containing binary data</li>
* <li>third bit reserved.</li>
* <li>4th to 6th bit (mask: 0x7&lt;&lt;3) define the type of a numeric field:
* <ul>
* <li>all bits in mask are cleared if no numeric field at all</li>
* <li>1&lt;&lt;3: Value is Int</li>
* <li>2&lt;&lt;3: Value is Long</li>
* <li>3&lt;&lt;3: Value is Int as Float (as of {@link Float#intBitsToFloat(int)}</li>
* <li>4&lt;&lt;3: Value is Long as Double (as of {@link Double#longBitsToDouble(long)}</li>
* </ul>
* </li>
* </ul>
* <li>Value --&gt; String | BinaryValue | Int | Long (depending on Bits)</li>
* <li>BinaryValue --&gt; ValueSize, &lt;{@link DataOutput#writeByte Byte}&gt;^ValueSize</li>
* <li>ValueSize --&gt; {@link DataOutput#writeVInt VInt}</li>
* </li>
* </ul>
* </ol>
* @lucene.experimental */
public class Lucene40StoredFieldsFormat extends StoredFieldsFormat {
@Override