LUCENE-3902, LUCENE-2946: javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1334871 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-05-07 04:16:19 +00:00
parent ea376c8725
commit dde2e650fe
29 changed files with 231 additions and 24 deletions

View File

@ -30,7 +30,8 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
* Implements the Lucene 4.0 index format, with configurable per-field postings formats.
*
*
* @see org.apache.lucene.codecs.lucene40 package documentation for file format details.
* @lucene.experimental
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever

View File

@ -31,7 +31,9 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/**
* Default PerDocConsumer implementation that uses compound file.
* Lucene 4.0 PerDocConsumer implementation that uses compound file.
*
* @see Lucene40DocValuesFormat
* @lucene.experimental
*/
public class Lucene40DocValuesConsumer extends DocValuesWriterBase {

View File

@ -23,10 +23,108 @@ import java.util.Set;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.index.DocValues; // javadocs
import org.apache.lucene.index.DocValues.Type; // javadocs
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.CompoundFileDirectory; // javadocs
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.util.CodecUtil; // javadocs
import org.apache.lucene.util.packed.PackedInts; // javadocs
/**
* Lucene 4.0 DocValues format.
* <p>
* Files:
* <ul>
* <li><tt>.dv.cfs</tt>: {@link CompoundFileDirectory compound container}</li>
* <li><tt>.dv.cfe</tt>: {@link CompoundFileDirectory compound entries}</li>
* </ul>
* Entries within the compound file:
* <ul>
* <li><tt>&lt;segment&gt;_&lt;fieldNumber&gt;.dat</tt>: data values</li>
* <li><tt>&lt;segment&gt;_&lt;fieldNumber&gt;.idx</tt>: index into the .dat for DEREF types</li>
* </ul>
* <p>
* There are several many types of {@link DocValues} with different encodings.
* From the perspective of filenames, all types store their values in <tt>.dat</tt>
* entries within the compound file. In the case of dereferenced/sorted types, the <tt>.dat</tt>
* actually contains only the unique values, and an additional <tt>.idx</tt> file contains
* pointers to these unique values.
* </p>
* <!-- TODO: review these and make sure everything is actually accurate -->
* Formats:
* <ul>
* <li>{@link Type#VAR_INTS VAR_INTS} .dat --&gt; Header, PackedType, MinValue,
* DefaultValue, PackedStream</li>
* <li>{@link Type#FIXED_INTS_8 FIXED_INTS_8} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li>
* <li>{@link Type#FIXED_INTS_16 FIXED_INTS_16} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeShort Short}<sup>maxdoc</sup></li>
* <li>{@link Type#FIXED_INTS_32 FIXED_INTS_32} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeInt Int32}<sup>maxdoc</sup></li>
* <li>{@link Type#FIXED_INTS_64 FIXED_INTS_64} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeLong Int64}<sup>maxdoc</sup></li>
* <li>{@link Type#FLOAT_32 FLOAT_32} .dat --&gt; Header, ValueSize,
* Float32<sup>maxdoc</sup></li>
* <li>{@link Type#FLOAT_64 FLOAT_64} .dat --&gt; Header, ValueSize,
* Float64<sup>maxdoc</sup></li>
* <li>{@link Type#BYTES_FIXED_STRAIGHT BYTES_FIXED_STRAIGHT} .dat --&gt; Header, ValueSize,
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>maxdoc</sup></li>
* <li>{@link Type#BYTES_VAR_STRAIGHT BYTES_VAR_STRAIGHT} .dat --&gt; Header, TotalBytes,
* Addresses, ({@link DataOutput#writeByte Byte} *
* <i>variable ValueSize</i>)<sup>maxdoc</sup></li>
* <li>{@link Type#BYTES_FIXED_DEREF BYTES_FIXED_DEREF} .idx --&gt; Header, NumValues,
* Addresses</li>
* <li>{@link Type#BYTES_FIXED_DEREF BYTES_FIXED_DEREF} .dat --&gt; Header, ValueSize,
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>NumValues</sup></li>
* <li>{@link Type#BYTES_VAR_DEREF BYTES_VAR_DEREF} .idx --&gt; Header, TotalVarBytes,
* Addresses</li>
* <li>{@link Type#BYTES_VAR_DEREF BYTES_VAR_DEREF} .dat --&gt; Header,
* (LengthPrefix + {@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>NumValues</sup></li>
* <li>{@link Type#BYTES_FIXED_SORTED BYTES_FIXED_SORTED} .idx --&gt; Header, NumValues,
* Ordinals</li>
* <li>{@link Type#BYTES_FIXED_SORTED BYTES_FIXED_SORTED} .dat --&gt; Header, ValueSize,
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>NumValues</sup></li>
* <li>{@link Type#BYTES_VAR_SORTED BYTES_VAR_SORTED} .idx --&gt; Header, TotalVarBytes,
* Addresses, Ordinals</li>
* <li>{@link Type#BYTES_VAR_SORTED BYTES_VAR_SORTED} .dat --&gt; Header, ValueSize,
* ({@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>NumValues</sup></li>
* </ul>
* Data Types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>PackedType --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>MinValue, DefaultValue --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>PackedStream, Addresses, Ordinals --&gt; {@link PackedInts}</li>
* <li>ValueSize, NumValues --&gt; {@link DataOutput#writeInt Int32}</li>
* <li>Float32 --&gt; 32-bit float encoded with {@link Float#floatToRawIntBits(float)}
* then written as {@link DataOutput#writeInt Int32}</li>
* <li>Float64 --&gt; 64-bit float encoded with {@link Double#doubleToRawLongBits(double)}
* then written as {@link DataOutput#writeLong Int64}</li>
* <li>TotalBytes --&gt; {@link DataOutput#writeVLong VLong}</li>
* <li>TotalVarBytes --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>LengthPrefix --&gt; Length of the data value as {@link DataOutput#writeVInt VInt} (maximum
* of 2 bytes)</li>
* </ul>
* Notes:
* <ul>
* <li>PackedType is a 0 when compressed, 1 when the stream is written as 64-bit integers.</li>
* <li>Addresses stores pointers to the actual byte location (indexed by docid). In the VAR_STRAIGHT
* case, each entry can have a different length, so to determine the length, docid+1 is
* retrieved. A sentinel address is written at the end for the VAR_STRAIGHT case, so the Addresses
* stream contains maxdoc+1 indices. For the deduplicated VAR_DEREF case, each length
* is encoded as a prefix to the data itself as a {@link DataOutput#writeVInt VInt}
* (maximum of 2 bytes).</li>
* <li>Ordinals stores the term ID in sorted order (indexed by docid). In the FIXED_SORTED case,
* the address into the .dat can be computed from the ordinal as
* <code>Header+ValueSize+(ordinal*ValueSize)</code> because the byte length is fixed.
* In the VAR_SORTED case, there is double indirection (docid -> ordinal -> address), but
* an additional sentinel ordinal+address is always written (so there are NumValues+1 ordinals). To
* determine the length, ord+1's address is looked up as well.</li>
* </ul>
*/
public class Lucene40DocValuesFormat extends DocValuesFormat {
@Override

View File

@ -38,7 +38,9 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.IOUtils;
/**
* Default PerDocProducer implementation that uses compound file.
* Lucene 4.0 PerDocProducer implementation that uses compound file.
*
* @see Lucene40DocValuesFormat
* @lucene.experimental
*/
public class Lucene40DocValuesProducer extends PerDocProducerBase {

View File

@ -35,7 +35,10 @@ import org.apache.lucene.store.IndexInput;
*/
/**
* Lucene 4.0 FieldInfos reader.
*
* @lucene.experimental
* @see Lucene40FieldInfosFormat
*/
public class Lucene40FieldInfosReader extends FieldInfosReader {

View File

@ -29,6 +29,9 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
/**
* Lucene 4.0 FieldInfos writer.
*
* @see Lucene40FieldInfosFormat
* @lucene.experimental
*/
public class Lucene40FieldInfosWriter extends FieldInfosWriter {

View File

@ -30,9 +30,20 @@ import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.CompoundFileDirectory; // javadocs
/**
* Norms Format for the default codec.
* Lucene 4.0 Norms Format.
* <p>
* Files:
* <ul>
* <li><tt>.nrm.cfs</tt>: {@link CompoundFileDirectory compound container}</li>
* <li><tt>.nrm.cfe</tt>: {@link CompoundFileDirectory compound entries}</li>
* </ul>
* Norms are implemented as DocValues, so other than file extension, norms are
* written exactly the same way as {@link Lucene40DocValuesFormat DocValues}.
*
* @see Lucene40DocValuesFormat
* @lucene.experimental
*/
public class Lucene40NormsFormat extends NormsFormat {
@ -53,6 +64,11 @@ public class Lucene40NormsFormat extends NormsFormat {
Lucene40NormsDocValuesConsumer.files(info, files);
}
/**
* Lucene 4.0 PerDocProducer implementation that uses compound file.
*
* @see Lucene40DocValuesFormat
*/
public static class Lucene40NormsDocValuesProducer extends Lucene40DocValuesProducer {
public Lucene40NormsDocValuesProducer(SegmentReadState state,
@ -77,6 +93,12 @@ public class Lucene40NormsFormat extends NormsFormat {
}
/**
* Lucene 4.0 PerDocConsumer implementation that uses compound file.
*
* @see Lucene40DocValuesFormat
* @lucene.experimental
*/
public static class Lucene40NormsDocValuesConsumer extends Lucene40DocValuesConsumer {
public Lucene40NormsDocValuesConsumer(PerDocWriteState state,

View File

@ -38,8 +38,11 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
/** Concrete class that reads the current doc/freq/skip
* postings format.
/**
* Concrete class that reads the 4.0 frq/prox
* postings format.
*
* @see Lucene40PostingsFormat
* @lucene.experimental */
public class Lucene40PostingsReader extends PostingsReaderBase {

View File

@ -38,7 +38,12 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/** @lucene.experimental */
/**
* Concrete class that writes the 4.0 frq/prx postings format.
*
* @see Lucene40PostingsFormat
* @lucene.experimental
*/
public final class Lucene40PostingsWriter extends PostingsWriterBase {
final static String CODEC = "Lucene40PostingsWriter";

View File

@ -30,7 +30,9 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Default implementation of {@link SegmentInfosReader}.
* Lucene 4.0 implementation of {@link SegmentInfosReader}.
*
* @see Lucene40SegmentInfosFormat
* @lucene.experimental
*/
public class Lucene40SegmentInfosReader extends SegmentInfosReader {

View File

@ -32,7 +32,9 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
* Default implementation of {@link SegmentInfosWriter}.
* Lucene 4.0 implementation of {@link SegmentInfosWriter}.
*
* @see Lucene40SegmentInfosFormat
* @lucene.experimental
*/
public class Lucene40SegmentInfosWriter extends SegmentInfosWriter {

View File

@ -24,8 +24,10 @@ import org.apache.lucene.codecs.MultiLevelSkipListReader;
import org.apache.lucene.store.IndexInput;
/**
* Implements the skip list reader for the default posting list format
* Implements the skip list reader for the 4.0 posting list format
* that stores positions and payloads.
*
* @see Lucene40PostingsFormat
* @lucene.experimental
*/
public class Lucene40SkipListReader extends MultiLevelSkipListReader {

View File

@ -25,8 +25,10 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
/**
* Implements the skip list writer for the default posting list format
* Implements the skip list writer for the 4.0 posting list format
* that stores positions and payloads.
*
* @see Lucene40PostingsFormat
* @lucene.experimental
*/
public class Lucene40SkipListWriter extends MultiLevelSkipListWriter {

View File

@ -40,6 +40,7 @@ import java.util.Set;
* <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @see Lucene40StoredFieldsFormat
* @lucene.internal
*/
public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {

View File

@ -36,7 +36,14 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/** @lucene.experimental */
/**
* Class responsible for writing stored document fields.
* <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @see Lucene40StoredFieldsFormat
* @lucene.experimental
*/
public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
// NOTE: bit 0 is free here! You can steal it!
static final int FIELD_IS_BINARY = 1 << 1;

View File

@ -45,6 +45,13 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.0 Term Vectors reader.
* <p>
* It reads .tvd, .tvf, and .tvx files.
*
* @see Lucene40TermVectorsFormat
*/
public class Lucene40TermVectorsReader extends TermVectorsReader {
// NOTE: if you make a new format, it must be larger than

View File

@ -47,6 +47,13 @@ import org.apache.lucene.util.StringHelper;
// file; saves a seek to tvd only to read a 0 vint (and
// saves a byte in tvd)
/**
* Lucene 4.0 Term Vectors writer.
* <p>
* It writes .tvd, .tvf, and .tvx files.
*
* @see Lucene40TermVectorsFormat
*/
public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
private final Directory directory;
private final String segment;

View File

@ -167,7 +167,8 @@ term occurs in each document. Note that this will not exist if all fields in
all documents omit position data.
</li>
<li>
Normalization factors. For each field in each document, a value is stored
{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Normalization factors}.
For each field in each document, a value is stored
that is multiplied into the score for hits on that field.
</li>
<li>
@ -178,7 +179,8 @@ term frequency. To add Term Vectors to your index see the
{@link org.apache.lucene.document.Field Field} constructors
</li>
<li>
Per-document values. Like stored values, these are also keyed by document
{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-document values}.
Like stored values, these are also keyed by document
number, but are generally intended to be loaded into main memory for fast
access. Whereas stored values are generally intended for summary results from
searches, per-document values are useful for things like scoring factors.
@ -270,12 +272,12 @@ systems that frequently run out of file handles.</td>
<td>Stores position information about where a term occurs in the index</td>
</tr>
<tr>
<td>Norms</td>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Norms}</td>
<td>.nrm.cfs, .nrm.cfe</td>
<td>Encodes length and boost factors for docs and fields</td>
</tr>
<tr>
<td>Per-Document Values</td>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-Document Values}</td>
<td>.dv.cfs, .dv.cfe</td>
<td>Encodes additional scoring factors or other per-document information.</td>
</tr>

View File

@ -608,7 +608,11 @@ public class PulsingPostingsReader extends PostingsReaderBase {
public Map<PulsingPostingsReader,DocsEnum> enums();
}
/** @lucene.internal */
/**
* Implementation of {@link PulsingEnumAttribute} for reuse of
* wrapped postings readers underneath pulsing.
*
* @lucene.internal */
public static final class PulsingEnumAttributeImpl extends AttributeImpl implements PulsingEnumAttribute {
// we could store 'other', but what if someone 'chained' multiple postings readers,
// this could cause problems?

View File

@ -548,7 +548,7 @@ public abstract class DocValues implements Closeable {
* pointer per document to dereference the shared byte[].
* Use this type if your documents may share the same byte[].
* <p>
* NOTE: Fields of this type will not store values for documents without and
* NOTE: Fields of this type will not store values for documents without an
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. Custom default values must be assigned explicitly.
@ -575,7 +575,7 @@ public abstract class DocValues implements Closeable {
* {@link #BYTES_FIXED_DEREF}, but allowing each
* document's value to be a different length.
* <p>
* NOTE: Fields of this type will not store values for documents without and
* NOTE: Fields of this type will not store values for documents without an
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. Custom default values must be assigned explicitly.
@ -589,7 +589,7 @@ public abstract class DocValues implements Closeable {
* {@link #BYTES_FIXED_SORTED}, but allowing each
* document's value to be a different length.
* <p>
* NOTE: Fields of this type will not store values for documents without and
* NOTE: Fields of this type will not store values for documents without an
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference.Custom default values must be assigned explicitly.
@ -607,7 +607,7 @@ public abstract class DocValues implements Closeable {
* and allows access via document id, ordinal and by-value.
* Use this type if your documents may share the same byte[].
* <p>
* NOTE: Fields of this type will not store values for documents without and
* NOTE: Fields of this type will not store values for documents without an
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. Custom default values must be assigned

View File

@ -16,7 +16,11 @@
*/
package org.apache.lucene.util.mutable;
/** @lucene.internal */
/**
* Base class for all mutable values.
*
* @lucene.internal
*/
public abstract class MutableValue implements Comparable<MutableValue> {
public boolean exists = true;

View File

@ -16,6 +16,10 @@
*/
package org.apache.lucene.util.mutable;
/**
* {@link MutableValue} implementation of type
* <code>boolean</code>.
*/
public class MutableValueBool extends MutableValue {
public boolean value;

View File

@ -18,6 +18,10 @@ package org.apache.lucene.util.mutable;
import java.util.Date;
/**
* {@link MutableValue} implementation of type
* {@link Date}.
*/
public class MutableValueDate extends MutableValueLong {
@Override
public Object toObject() {

View File

@ -16,6 +16,10 @@
*/
package org.apache.lucene.util.mutable;
/**
* {@link MutableValue} implementation of type
* <code>double</code>.
*/
public class MutableValueDouble extends MutableValue {
public double value;

View File

@ -16,6 +16,10 @@
*/
package org.apache.lucene.util.mutable;
/**
* {@link MutableValue} implementation of type
* <code>float</code>.
*/
public class MutableValueFloat extends MutableValue {
public float value;

View File

@ -16,6 +16,10 @@
*/
package org.apache.lucene.util.mutable;
/**
* {@link MutableValue} implementation of type
* <code>int</code>.
*/
public class MutableValueInt extends MutableValue {
public int value;

View File

@ -16,6 +16,10 @@
*/
package org.apache.lucene.util.mutable;
/**
* {@link MutableValue} implementation of type
* <code>long</code>.
*/
public class MutableValueLong extends MutableValue {
public long value;

View File

@ -18,6 +18,10 @@ package org.apache.lucene.util.mutable;
import org.apache.lucene.util.BytesRef;
/**
* {@link MutableValue} implementation of type
* {@link String}.
*/
public class MutableValueStr extends MutableValue {
public BytesRef value = new BytesRef();

View File

@ -152,11 +152,11 @@ index for all the files contained in a directory.</li>
queries and searches an index.</li>
</ul>
To demonstrate these, try something like:
<blockquote><tt>> <b>java -cp lucene.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles rec.food.recipes/soups</b></tt>
<blockquote><tt>> <b>java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles rec.food.recipes/soups</b></tt>
<br><tt>adding rec.food.recipes/soups/abalone-chowder</tt>
<br><tt>&nbsp; </tt>[ ... ]
<p><tt>> <b>java -cp lucene.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles</b></tt>
<p><tt>> <b>java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles</b></tt>
<br><tt>Query: <b>chowder</b></tt>
<br><tt>Searching for: chowder</tt>
<br><tt>34 total matching documents</tt>