LUCENE-5969: fix compile/javadocs, tighten up backwards codecs, add more safety to 5.x fields/vectors

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1628070 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-09-28 14:36:00 +00:00
parent a06c00350b
commit 24005cdcc5
88 changed files with 332 additions and 1545 deletions

View File

@ -31,19 +31,11 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.MutableBits;
/** Optimized implementation of a vector of bits. This is more-or-less like
* java.util.BitSet, but also includes the following:
* <ul>
* <li>a count() method, which efficiently computes the number of one bits;</li>
* <li>optimized read from and write to disk;</li>
* <li>inlinable get() method;</li>
* <li>store and load, as bit set or d-gaps, depending on sparseness;</li>
* </ul>
*
* @lucene.internal
/**
* Bitset for support of 4.x live documents
* @deprecated only for old 4.x segments
*/
// pkg-private: if this thing is generally useful then it can go back in .util,
// but the serialization must be here underneath the codec.
@Deprecated
final class BitVector implements Cloneable, MutableBits {
private byte[] bits;
@ -52,7 +44,7 @@ final class BitVector implements Cloneable, MutableBits {
private int version;
/** Constructs a vector capable of holding <code>n</code> bits. */
public BitVector(int n) {
BitVector(int n) {
size = n;
bits = new byte[getNumBytes(size)];
count = 0;
@ -90,27 +82,6 @@ final class BitVector implements Cloneable, MutableBits {
count = -1;
}
/** Sets the value of <code>bit</code> to true, and
* returns true if bit was already set */
public final boolean getAndSet(int bit) {
if (bit >= size) {
throw new ArrayIndexOutOfBoundsException("bit=" + bit + " size=" + size);
}
final int pos = bit >> 3;
final int v = bits[pos];
final int flag = 1 << (bit & 7);
if ((flag & v) != 0)
return true;
else {
bits[pos] = (byte) (v | flag);
if (count != -1) {
count++;
assert count <= size;
}
return false;
}
}
/** Sets the value of <code>bit</code> to zero. */
@Override
public final void clear(int bit) {
@ -121,25 +92,6 @@ final class BitVector implements Cloneable, MutableBits {
count = -1;
}
public final boolean getAndClear(int bit) {
if (bit >= size) {
throw new ArrayIndexOutOfBoundsException(bit);
}
final int pos = bit >> 3;
final int v = bits[pos];
final int flag = 1 << (bit & 7);
if ((flag & v) == 0) {
return false;
} else {
bits[pos] &= ~flag;
if (count != -1) {
count--;
assert count >= 0;
}
return true;
}
}
/** Returns <code>true</code> if <code>bit</code> is one and
<code>false</code> if it is zero. */
@Override
@ -150,7 +102,7 @@ final class BitVector implements Cloneable, MutableBits {
/** Returns the number of bits in this vector. This is also one greater than
the number of the largest valid bit number. */
public final int size() {
final int size() {
return size;
}
@ -162,7 +114,7 @@ final class BitVector implements Cloneable, MutableBits {
/** Returns the total number of one bits in this vector. This is efficiently
computed and cached, so that, if the vector is not changed, no
recomputation is done for repeated calls. */
public final int count() {
final int count() {
// if the vector has been modified
if (count == -1) {
int c = 0;
@ -177,7 +129,7 @@ final class BitVector implements Cloneable, MutableBits {
}
/** For testing */
public final int getRecomputedCount() {
final int getRecomputedCount() {
int c = 0;
int end = bits.length;
for (int i = 0; i < end; i++) {
@ -191,29 +143,29 @@ final class BitVector implements Cloneable, MutableBits {
private static String CODEC = "BitVector";
// Version before version tracking was added:
public final static int VERSION_PRE = -1;
final static int VERSION_PRE = -1;
// First version:
public final static int VERSION_START = 0;
final static int VERSION_START = 0;
// Changed DGaps to encode gaps between cleared bits, not
// set:
public final static int VERSION_DGAPS_CLEARED = 1;
final static int VERSION_DGAPS_CLEARED = 1;
// added checksum
public final static int VERSION_CHECKSUM = 2;
final static int VERSION_CHECKSUM = 2;
// Increment version to change it:
public final static int VERSION_CURRENT = VERSION_CHECKSUM;
final static int VERSION_CURRENT = VERSION_CHECKSUM;
public int getVersion() {
int getVersion() {
return version;
}
/** Writes this vector to the file <code>name</code> in Directory
<code>d</code>, in a format that can be read by the constructor {@link
#BitVector(Directory, String, IOContext)}. */
public final void write(Directory d, String name, IOContext context) throws IOException {
final void write(Directory d, String name, IOContext context) throws IOException {
assert !(d instanceof CompoundFileDirectory);
try (IndexOutput output = d.createOutput(name, context)) {
output.writeInt(-2);
@ -230,7 +182,7 @@ final class BitVector implements Cloneable, MutableBits {
}
/** Invert all bits */
public void invertAll() {
void invertAll() {
if (count != -1) {
count = size - count;
}
@ -254,13 +206,6 @@ final class BitVector implements Cloneable, MutableBits {
}
}
/** Set all bits */
public void setAll() {
Arrays.fill(bits, (byte) 0xff);
clearUnusedBits();
count = size;
}
/** Write as a bit set */
private void writeBits(IndexOutput output) throws IOException {
output.writeInt(size()); // write size
@ -325,7 +270,7 @@ final class BitVector implements Cloneable, MutableBits {
/** Constructs a bit vector from the file <code>name</code> in Directory
<code>d</code>, as written by the {@link #write} method.
*/
public BitVector(Directory d, String name, IOContext context) throws IOException {
BitVector(Directory d, String name, IOContext context) throws IOException {
try (ChecksumIndexInput input = d.openChecksumInput(name, context)) {
final int firstInt = input.readInt();

View File

@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene40;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
@ -30,17 +29,9 @@ import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
* Implements the Lucene 4.0 index format, with configurable per-field postings formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene40 package documentation for file format details.
* Reader for the 4.0 file format
* @deprecated Only for reading old 4.0 segments
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
@Deprecated
public class Lucene40Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat();

View File

@ -19,119 +19,22 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.PackedInts;
/**
* Lucene 4.0 DocValues format.
* <p>
* Files:
* <ul>
* <li><tt>.dv.cfs</tt>: {@link CompoundFileDirectory compound container}</li>
* <li><tt>.dv.cfe</tt>: {@link CompoundFileDirectory compound entries}</li>
* </ul>
* Entries within the compound file:
* <ul>
* <li><tt>&lt;segment&gt;_&lt;fieldNumber&gt;.dat</tt>: data values</li>
* <li><tt>&lt;segment&gt;_&lt;fieldNumber&gt;.idx</tt>: index into the .dat for DEREF types</li>
* </ul>
* <p>
* There are several many types of {@code DocValues} with different encodings.
* From the perspective of filenames, all types store their values in <tt>.dat</tt>
* entries within the compound file. In the case of dereferenced/sorted types, the <tt>.dat</tt>
* actually contains only the unique values, and an additional <tt>.idx</tt> file contains
* pointers to these unique values.
* </p>
* Formats:
* <ul>
* <li>{@code VAR_INTS} .dat --&gt; Header, PackedType, MinValue,
* DefaultValue, PackedStream</li>
* <li>{@code FIXED_INTS_8} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li>
* <li>{@code FIXED_INTS_16} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeShort Short}<sup>maxdoc</sup></li>
* <li>{@code FIXED_INTS_32} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeInt Int32}<sup>maxdoc</sup></li>
* <li>{@code FIXED_INTS_64} .dat --&gt; Header, ValueSize,
* {@link DataOutput#writeLong Int64}<sup>maxdoc</sup></li>
* <li>{@code FLOAT_32} .dat --&gt; Header, ValueSize, Float32<sup>maxdoc</sup></li>
* <li>{@code FLOAT_64} .dat --&gt; Header, ValueSize, Float64<sup>maxdoc</sup></li>
* <li>{@code BYTES_FIXED_STRAIGHT} .dat --&gt; Header, ValueSize,
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>maxdoc</sup></li>
* <li>{@code BYTES_VAR_STRAIGHT} .idx --&gt; Header, TotalBytes, Addresses</li>
* <li>{@code BYTES_VAR_STRAIGHT} .dat --&gt; Header,
({@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>maxdoc</sup></li>
* <li>{@code BYTES_FIXED_DEREF} .idx --&gt; Header, NumValues, Addresses</li>
* <li>{@code BYTES_FIXED_DEREF} .dat --&gt; Header, ValueSize,
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>NumValues</sup></li>
* <li>{@code BYTES_VAR_DEREF} .idx --&gt; Header, TotalVarBytes, Addresses</li>
* <li>{@code BYTES_VAR_DEREF} .dat --&gt; Header,
* (LengthPrefix + {@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>NumValues</sup></li>
* <li>{@code BYTES_FIXED_SORTED} .idx --&gt; Header, NumValues, Ordinals</li>
* <li>{@code BYTES_FIXED_SORTED} .dat --&gt; Header, ValueSize,
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>NumValues</sup></li>
* <li>{@code BYTES_VAR_SORTED} .idx --&gt; Header, TotalVarBytes, Addresses, Ordinals</li>
* <li>{@code BYTES_VAR_SORTED} .dat --&gt; Header,
* ({@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>NumValues</sup></li>
* </ul>
* Data Types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>PackedType --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>MaxAddress, MinValue, DefaultValue --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>PackedStream, Addresses, Ordinals --&gt; {@link PackedInts}</li>
* <li>ValueSize, NumValues --&gt; {@link DataOutput#writeInt Int32}</li>
* <li>Float32 --&gt; 32-bit float encoded with {@link Float#floatToRawIntBits(float)}
* then written as {@link DataOutput#writeInt Int32}</li>
* <li>Float64 --&gt; 64-bit float encoded with {@link Double#doubleToRawLongBits(double)}
* then written as {@link DataOutput#writeLong Int64}</li>
* <li>TotalBytes --&gt; {@link DataOutput#writeVLong VLong}</li>
* <li>TotalVarBytes --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>LengthPrefix --&gt; Length of the data value as {@link DataOutput#writeVInt VInt} (maximum
* of 2 bytes)</li>
* </ul>
* Notes:
* <ul>
* <li>PackedType is a 0 when compressed, 1 when the stream is written as 64-bit integers.</li>
* <li>Addresses stores pointers to the actual byte location (indexed by docid). In the VAR_STRAIGHT
* case, each entry can have a different length, so to determine the length, docid+1 is
* retrieved. A sentinel address is written at the end for the VAR_STRAIGHT case, so the Addresses
* stream contains maxdoc+1 indices. For the deduplicated VAR_DEREF case, each length
* is encoded as a prefix to the data itself as a {@link DataOutput#writeVInt VInt}
* (maximum of 2 bytes).</li>
* <li>Ordinals stores the term ID in sorted order (indexed by docid). In the FIXED_SORTED case,
* the address into the .dat can be computed from the ordinal as
* <code>Header+ValueSize+(ordinal*ValueSize)</code> because the byte length is fixed.
* In the VAR_SORTED case, there is double indirection (docid -> ordinal -> address), but
* an additional sentinel ordinal+address is always written (so there are NumValues+1 ordinals). To
* determine the length, ord+1's address is looked up as well.</li>
* <li>{@code BYTES_VAR_STRAIGHT BYTES_VAR_STRAIGHT} in contrast to other straight
* variants uses a <tt>.idx</tt> file to improve lookup perfromance. In contrast to
* {@code BYTES_VAR_DEREF BYTES_VAR_DEREF} it doesn't apply deduplication of the document values.
* </li>
* </ul>
* <p>
* Limitations:
* <ul>
* <li> Binary doc values can be at most {@link #MAX_BINARY_FIELD_LENGTH} in length.
* </ul>
* @deprecated Only for reading old 4.0 and 4.1 segments
*/
@Deprecated
// NOTE: not registered in SPI, doesnt respect segment suffix, etc
// for back compat only!
public class Lucene40DocValuesFormat extends DocValuesFormat {
/** Maximum length for each binary doc values field. */
public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
/** Sole constructor. */
public Lucene40DocValuesFormat() {
@ -144,7 +47,7 @@ public class Lucene40DocValuesFormat extends DocValuesFormat {
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
public final DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
"dv",
IndexFileNames.COMPOUND_FILE_EXTENSION);

View File

@ -48,7 +48,6 @@ import org.apache.lucene.util.packed.PackedInts;
/**
* Reads the 4.0 format of norms/docvalues
* @lucene.experimental
* @deprecated Only for reading old 4.0 and 4.1 segments
*/
@Deprecated

View File

@ -19,79 +19,12 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.store.DataOutput; // javadoc
/**
* Lucene 4.0 Field Infos format.
* <p>
* <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p>
* <p>FieldInfos (.fnm) --&gt; Header,FieldsCount, &lt;FieldName,FieldNumber,
* FieldBits,DocValuesBits,Attributes&gt; <sup>FieldsCount</sup></p>
* <p>Data types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#checkHeader CodecHeader}</li>
* <li>FieldsCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldName --&gt; {@link DataOutput#writeString String}</li>
* <li>FieldBits, DocValuesBits --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt VInt}</li>
* <li>Attributes --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* </ul>
* </p>
* Field Descriptions:
* <ul>
* <li>FieldsCount: the number of fields in this file.</li>
* <li>FieldName: name of the field as a UTF-8 String.</li>
* <li>FieldNumber: the field's number. Note that unlike previous versions of
* Lucene, the fields are not numbered implicitly by their order in the
* file, instead explicitly.</li>
* <li>FieldBits: a byte containing field options.
* <ul>
* <li>The low-order bit is one for indexed fields, and zero for non-indexed
* fields.</li>
* <li>The second lowest-order bit is one for fields that have term vectors
* stored, and zero for fields without term vectors.</li>
* <li>If the third lowest order-bit is set (0x4), offsets are stored into
* the postings list in addition to positions.</li>
* <li>Fourth bit is unused.</li>
* <li>If the fifth lowest-order bit is set (0x10), norms are omitted for the
* indexed field.</li>
* <li>If the sixth lowest-order bit is set (0x20), payloads are stored for the
* indexed field.</li>
* <li>If the seventh lowest-order bit is set (0x40), term frequencies and
* positions omitted for the indexed field.</li>
* <li>If the eighth lowest-order bit is set (0x80), positions are omitted for the
* indexed field.</li>
* </ul>
* </li>
* <li>DocValuesBits: a byte containing per-document value types. The type
* recorded as two four-bit integers, with the high-order bits representing
* <code>norms</code> options, and the low-order bits representing
* {@code DocValues} options. Each four-bit integer can be decoded as such:
* <ul>
* <li>0: no DocValues for this field.</li>
* <li>1: variable-width signed integers. ({@code Type#VAR_INTS VAR_INTS})</li>
* <li>2: 32-bit floating point values. ({@code Type#FLOAT_32 FLOAT_32})</li>
* <li>3: 64-bit floating point values. ({@code Type#FLOAT_64 FLOAT_64})</li>
* <li>4: fixed-length byte array values. ({@code Type#BYTES_FIXED_STRAIGHT BYTES_FIXED_STRAIGHT})</li>
* <li>5: fixed-length dereferenced byte array values. ({@code Type#BYTES_FIXED_DEREF BYTES_FIXED_DEREF})</li>
* <li>6: variable-length byte array values. ({@code Type#BYTES_VAR_STRAIGHT BYTES_VAR_STRAIGHT})</li>
* <li>7: variable-length dereferenced byte array values. ({@code Type#BYTES_VAR_DEREF BYTES_VAR_DEREF})</li>
* <li>8: 16-bit signed integers. ({@code Type#FIXED_INTS_16 FIXED_INTS_16})</li>
* <li>9: 32-bit signed integers. ({@code Type#FIXED_INTS_32 FIXED_INTS_32})</li>
* <li>10: 64-bit signed integers. ({@code Type#FIXED_INTS_64 FIXED_INTS_64})</li>
* <li>11: 8-bit signed integers. ({@code Type#FIXED_INTS_8 FIXED_INTS_8})</li>
* <li>12: fixed-length sorted byte array values. ({@code Type#BYTES_FIXED_SORTED BYTES_FIXED_SORTED})</li>
* <li>13: variable-length sorted byte array values. ({@code Type#BYTES_VAR_SORTED BYTES_VAR_SORTED})</li>
* </ul>
* </li>
* <li>Attributes: a key-value map of codec-private attributes.</li>
* </ul>
*
* @lucene.experimental
* @deprecated Only for reading old 4.0 and 4.1 segments
*/
@Deprecated
@ -103,7 +36,7 @@ public class Lucene40FieldInfosFormat extends FieldInfosFormat {
}
@Override
public FieldInfosReader getFieldInfosReader() throws IOException {
public final FieldInfosReader getFieldInfosReader() throws IOException {
return reader;
}

View File

@ -37,13 +37,10 @@ import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.0 FieldInfos reader.
*
* @lucene.experimental
* @see Lucene40FieldInfosFormat
* @deprecated Only for reading old 4.0 and 4.1 segments
*/
@Deprecated
class Lucene40FieldInfosReader extends FieldInfosReader {
final class Lucene40FieldInfosReader extends FieldInfosReader {
/** Sole constructor. */
public Lucene40FieldInfosReader() {

View File

@ -20,12 +20,10 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
@ -33,37 +31,10 @@ import org.apache.lucene.util.MutableBits;
/**
* Lucene 4.0 Live Documents Format.
* <p>
* <p>The .del file is optional, and only exists when a segment contains
* deletions.</p>
* <p>Although per-segment, this file is maintained exterior to compound segment
* files.</p>
* <p>Deletions (.del) --&gt; Format,Header,ByteCount,BitCount, Bits | DGaps (depending
* on Format)</p>
* <ul>
* <li>Format,ByteSize,BitCount --&gt; {@link DataOutput#writeInt Uint32}</li>
* <li>Bits --&gt; &lt;{@link DataOutput#writeByte Byte}&gt; <sup>ByteCount</sup></li>
* <li>DGaps --&gt; &lt;DGap,NonOnesByte&gt; <sup>NonzeroBytesCount</sup></li>
* <li>DGap --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>NonOnesByte --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* </ul>
* <p>Format is 1: indicates cleared DGaps.</p>
* <p>ByteCount indicates the number of bytes in Bits. It is typically
* (SegSize/8)+1.</p>
* <p>BitCount indicates the number of bits that are currently set in Bits.</p>
* <p>Bits contains one bit for each document indexed. When the bit corresponding
* to a document number is cleared, that document is marked as deleted. Bit ordering
* is from least to most significant. Thus, if Bits contains two bytes, 0x00 and
* 0x02, then document 9 is marked as alive (not deleted).</p>
* <p>DGaps represents sparse bit-vectors more efficiently than Bits. It is made
* of DGaps on indexes of nonOnes bytes in Bits, and the nonOnes bytes themselves.
* The number of nonOnes bytes in Bits (NonOnesBytesCount) is not stored.</p>
* <p>For example, if there are 8000 bits and only bits 10,12,32 are cleared, DGaps
* would be used:</p>
* <p>(VInt) 1 , (byte) 20 , (VInt) 3 , (Byte) 1</p>
* @deprecated Only for reading old 4.x segments
*/
public class Lucene40LiveDocsFormat extends LiveDocsFormat {
@Deprecated
public final class Lucene40LiveDocsFormat extends LiveDocsFormat {
/** Extension of deletes */
static final String DELETES_EXTENSION = "del";

View File

@ -25,21 +25,9 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.CompoundFileDirectory;
/**
* Lucene 4.0 Norms Format.
* <p>
* Files:
* <ul>
* <li><tt>.nrm.cfs</tt>: {@link CompoundFileDirectory compound container}</li>
* <li><tt>.nrm.cfe</tt>: {@link CompoundFileDirectory compound entries}</li>
* </ul>
* Norms are implemented as DocValues, so other than file extension, norms are
* written exactly the same way as {@link Lucene40DocValuesFormat DocValues}.
*
* @see Lucene40DocValuesFormat
* @lucene.experimental
* @deprecated Only for reading old 4.0 and 4.1 segments
*/
@Deprecated

View File

@ -27,14 +27,13 @@ import org.apache.lucene.util.Accountable;
/**
* Reads 4.0/4.1 norms.
* Implemented the same as docvalues, but with a different filename.
* @deprecated Only for reading old 4.0 and 4.1 segments
*/
@Deprecated
class Lucene40NormsReader extends NormsProducer {
final class Lucene40NormsReader extends NormsProducer {
private final Lucene40DocValuesReader impl;
public Lucene40NormsReader(SegmentReadState state, String filename) throws IOException {
Lucene40NormsReader(SegmentReadState state, String filename) throws IOException {
impl = new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
}

View File

@ -26,17 +26,13 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
/**
* Provides a {@link PostingsReaderBase} and {@link
* PostingsWriterBase}.
*
* PostingsReaderBase for 4.0 segments
* @deprecated Only for reading old 4.0 segments */
// TODO: should these also be named / looked up via SPI?
@Deprecated
public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
/** Sole constructor. */
public Lucene40PostingsBaseFormat() {
Lucene40PostingsBaseFormat() {
super("Lucene40");
}

View File

@ -19,226 +19,25 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase; // javadocs
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.index.FieldInfo.IndexOptions; // javadocs
import org.apache.lucene.index.FieldInfos; // javadocs
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.util.fst.FST; // javadocs
/**
* Lucene 4.0 Postings format.
* <p>
* Files:
* <ul>
* <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
* <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
* <li><tt>.frq</tt>: <a href="#Frequencies">Frequencies</a></li>
* <li><tt>.prx</tt>: <a href="#Positions">Positions</a></li>
* </ul>
* </p>
* <p>
* <a name="Termdictionary" id="Termdictionary"></a>
* <h3>Term Dictionary</h3>
*
* <p>The .tim file contains the list of terms in each
* field along with per-term statistics (such as docfreq)
* and pointers to the frequencies, positions and
* skip data in the .frq and .prx files.
* See {@link BlockTreeTermsWriter} for more details on the format.
* </p>
*
* <p>NOTE: The term dictionary can plug into different postings implementations:
* the postings writer/reader are actually responsible for encoding
* and decoding the Postings Metadata and Term Metadata sections described here:</p>
* <ul>
* <li>Postings Metadata --&gt; Header, SkipInterval, MaxSkipLevels, SkipMinimum</li>
* <li>Term Metadata --&gt; FreqDelta, SkipDelta?, ProxDelta?
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>SkipInterval,MaxSkipLevels,SkipMinimum --&gt; {@link DataOutput#writeInt Uint32}</li>
* <li>SkipDelta,FreqDelta,ProxDelta --&gt; {@link DataOutput#writeVLong VLong}</li>
* </ul>
* <p>Notes:</p>
* <ul>
* <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
* for the postings.</li>
* <li>SkipInterval is the fraction of TermDocs stored in skip tables. It is used to accelerate
* {@link DocsEnum#advance(int)}. Larger values result in smaller indexes, greater
* acceleration, but fewer accelerable cases, while smaller values result in bigger indexes,
* less acceleration (in case of a small value for MaxSkipLevels) and more accelerable cases.
* </li>
* <li>MaxSkipLevels is the max. number of skip levels stored for each term in the .frq file. A
* low value results in smaller indexes but less acceleration, a larger value results in
* slightly larger indexes but greater acceleration. See format of .frq file for more
* information about skip levels.</li>
* <li>SkipMinimum is the minimum document frequency a term must have in order to write any
* skip data at all.</li>
* <li>FreqDelta determines the position of this term's TermFreqs within the .frq
* file. In particular, it is the difference between the position of this term's
* data in that file and the position of the previous term's data (or zero, for
* the first term in the block).</li>
* <li>ProxDelta determines the position of this term's TermPositions within the
* .prx file. In particular, it is the difference between the position of this
* term's data in that file and the position of the previous term's data (or zero,
* for the first term in the block. For fields that omit position data, this will
* be 0 since prox information is not stored.</li>
* <li>SkipDelta determines the position of this term's SkipData within the .frq
* file. In particular, it is the number of bytes after TermFreqs that the
* SkipData starts. In other words, it is the length of the TermFreq data.
* SkipDelta is only stored if DocFreq is not smaller than SkipMinimum.</li>
* </ul>
* <a name="Termindex" id="Termindex"></a>
* <h3>Term Index</h3>
* <p>The .tip file contains an index into the term dictionary, so that it can be
* accessed randomly. See {@link BlockTreeTermsWriter} for more details on the format.</p>
* <a name="Frequencies" id="Frequencies"></a>
* <h3>Frequencies</h3>
* <p>The .frq file contains the lists of documents which contain each term, along
* with the frequency of the term in that document (except when frequencies are
* omitted: {@link IndexOptions#DOCS_ONLY}).</p>
* <ul>
* <li>FreqFile (.frq) --&gt; Header, &lt;TermFreqs, SkipData?&gt; <sup>TermCount</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermFreqs --&gt; &lt;TermFreq&gt; <sup>DocFreq</sup></li>
* <li>TermFreq --&gt; DocDelta[, Freq?]</li>
* <li>SkipData --&gt; &lt;&lt;SkipLevelLength, SkipLevel&gt;
* <sup>NumSkipLevels-1</sup>, SkipLevel&gt; &lt;SkipDatum&gt;</li>
* <li>SkipLevel --&gt; &lt;SkipDatum&gt; <sup>DocFreq/(SkipInterval^(Level +
* 1))</sup></li>
* <li>SkipDatum --&gt;
* DocSkip,PayloadLength?,OffsetLength?,FreqSkip,ProxSkip,SkipChildLevelPointer?</li>
* <li>DocDelta,Freq,DocSkip,PayloadLength,OffsetLength,FreqSkip,ProxSkip --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>SkipChildLevelPointer --&gt; {@link DataOutput#writeVLong VLong}</li>
* </ul>
* <p>TermFreqs are ordered by term (the term is implicit, from the term dictionary).</p>
* <p>TermFreq entries are ordered by increasing document number.</p>
* <p>DocDelta: if frequencies are indexed, this determines both the document
* number and the frequency. In particular, DocDelta/2 is the difference between
* this document number and the previous document number (or zero when this is the
* first document in a TermFreqs). When DocDelta is odd, the frequency is one.
* When DocDelta is even, the frequency is read as another VInt. If frequencies
* are omitted, DocDelta contains the gap (not multiplied by 2) between document
* numbers and no frequency information is stored.</p>
* <p>For example, the TermFreqs for a term which occurs once in document seven
* and three times in document eleven, with frequencies indexed, would be the
* following sequence of VInts:</p>
* <p>15, 8, 3</p>
* <p>If frequencies were omitted ({@link IndexOptions#DOCS_ONLY}) it would be this
* sequence of VInts instead:</p>
* <p>7,4</p>
* <p>DocSkip records the document number before every SkipInterval <sup>th</sup>
* document in TermFreqs. If payloads and offsets are disabled for the term's field, then
* DocSkip represents the difference from the previous value in the sequence. If
* payloads and/or offsets are enabled for the term's field, then DocSkip/2 represents the
* difference from the previous value in the sequence. In this case when
* DocSkip is odd, then PayloadLength and/or OffsetLength are stored indicating the length of
* the last payload/offset before the SkipInterval<sup>th</sup> document in TermPositions.</p>
* <p>PayloadLength indicates the length of the last payload.</p>
* <p>OffsetLength indicates the length of the last offset (endOffset-startOffset).</p>
* <p>
* FreqSkip and ProxSkip record the position of every SkipInterval <sup>th</sup>
* entry in FreqFile and ProxFile, respectively. File positions are relative to
* the start of TermFreqs and Positions, to the previous SkipDatum in the
* sequence.</p>
* <p>For example, if DocFreq=35 and SkipInterval=16, then there are two SkipData
* entries, containing the 15 <sup>th</sup> and 31 <sup>st</sup> document numbers
* in TermFreqs. The first FreqSkip names the number of bytes after the beginning
* of TermFreqs that the 16 <sup>th</sup> SkipDatum starts, and the second the
* number of bytes after that that the 32 <sup>nd</sup> starts. The first ProxSkip
* names the number of bytes after the beginning of Positions that the 16
* <sup>th</sup> SkipDatum starts, and the second the number of bytes after that
* that the 32 <sup>nd</sup> starts.</p>
* <p>Each term can have multiple skip levels. The amount of skip levels for a
* term is NumSkipLevels = Min(MaxSkipLevels,
* floor(log(DocFreq/log(SkipInterval)))). The number of SkipData entries for a
* skip level is DocFreq/(SkipInterval^(Level + 1)), whereas the lowest skip level
* is Level=0.<br>
* Example: SkipInterval = 4, MaxSkipLevels = 2, DocFreq = 35. Then skip level 0
* has 8 SkipData entries, containing the 3<sup>rd</sup>, 7<sup>th</sup>,
* 11<sup>th</sup>, 15<sup>th</sup>, 19<sup>th</sup>, 23<sup>rd</sup>,
* 27<sup>th</sup>, and 31<sup>st</sup> document numbers in TermFreqs. Skip level
* 1 has 2 SkipData entries, containing the 15<sup>th</sup> and 31<sup>st</sup>
* document numbers in TermFreqs.<br>
* The SkipData entries on all upper levels &gt; 0 contain a SkipChildLevelPointer
* referencing the corresponding SkipData entry in level-1. In the example has
* entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a
* pointer to entry 31 on level 0.
* </p>
* <a name="Positions" id="Positions"></a>
* <h3>Positions</h3>
* <p>The .prx file contains the lists of positions that each term occurs at
* within documents. Note that fields omitting positional data do not store
* anything into this file, and if all fields in the index omit positional data
* then the .prx file will not exist.</p>
* <ul>
* <li>ProxFile (.prx) --&gt; Header, &lt;TermPositions&gt; <sup>TermCount</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermPositions --&gt; &lt;Positions&gt; <sup>DocFreq</sup></li>
* <li>Positions --&gt; &lt;PositionDelta,PayloadLength?,OffsetDelta?,OffsetLength?,PayloadData?&gt; <sup>Freq</sup></li>
* <li>PositionDelta,OffsetDelta,OffsetLength,PayloadLength --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>PayloadData --&gt; {@link DataOutput#writeByte byte}<sup>PayloadLength</sup></li>
* </ul>
* <p>TermPositions are ordered by term (the term is implicit, from the term dictionary).</p>
* <p>Positions entries are ordered by increasing document number (the document
* number is implicit from the .frq file).</p>
* <p>PositionDelta is, if payloads are disabled for the term's field, the
* difference between the position of the current occurrence in the document and
* the previous occurrence (or zero, if this is the first occurrence in this
* document). If payloads are enabled for the term's field, then PositionDelta/2
* is the difference between the current and the previous position. If payloads
* are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
* the length of the payload at the current term position.</p>
* <p>For example, the TermPositions for a term which occurs as the fourth term in
* one document, and as the fifth and ninth term in a subsequent document, would
* be the following sequence of VInts (payloads disabled):</p>
* <p>4, 5, 4</p>
* <p>PayloadData is metadata associated with the current term position. If
* PayloadLength is stored at the current position, then it indicates the length
* of this payload. If PayloadLength is not stored, then this payload has the same
* length as the payload at the previous position.</p>
* <p>OffsetDelta/2 is the difference between this position's startOffset from the
* previous occurrence (or zero, if this is the first occurrence in this document).
* If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the
* previous occurrence and an OffsetLength follows. Offset data is only written for
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.</p>
*
* @deprecated Only for reading old 4.0 segments */
// TODO: this class could be created by wrapping
// BlockTreeTermsDict around Lucene40PostingsBaseFormat; ie
// we should not duplicate the code from that class here:
* @deprecated Only for reading old 4.0 segments
*/
@Deprecated
public class Lucene40PostingsFormat extends PostingsFormat {
/** minimum items (terms or sub-blocks) per block for BlockTree */
protected final int minBlockSize;
/** maximum items (terms or sub-blocks) per block for BlockTree */
protected final int maxBlockSize;
/** Creates {@code Lucene40PostingsFormat} with default
* settings. */
public Lucene40PostingsFormat() {
this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
/** Creates {@code Lucene40PostingsFormat} with custom
* values for {@code minBlockSize} and {@code
* maxBlockSize} passed to block terms dictionary.
* @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
private Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) {
super("Lucene40");
this.minBlockSize = minBlockSize;
assert minBlockSize > 1;
this.maxBlockSize = maxBlockSize;
}
@Override
@ -247,7 +46,7 @@ public class Lucene40PostingsFormat extends PostingsFormat {
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
public final FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new Lucene40PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
boolean success = false;
@ -276,6 +75,6 @@ public class Lucene40PostingsFormat extends PostingsFormat {
@Override
public String toString() {
return getName() + "(minBlockSize=" + minBlockSize + " maxBlockSize=" + maxBlockSize + ")";
return getName();
}
}

View File

@ -43,13 +43,10 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
/**
* Concrete class that reads the 4.0 frq/prox
* postings format.
*
* @see Lucene40PostingsFormat
* @deprecated Only for reading old 4.0 segments */
* Reader for 4.0 postings format
* @deprecated Only for reading old 4.0 segments */
@Deprecated
public class Lucene40PostingsReader extends PostingsReaderBase {
final class Lucene40PostingsReader extends PostingsReaderBase {
final static String TERMS_CODEC = "Lucene40PostingsWriterTerms";
final static String FRQ_CODEC = "Lucene40PostingsWriterFrq";

View File

@ -17,57 +17,14 @@ package org.apache.lucene.codecs.lucene40;
* limitations under the License.
*/
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.SegmentInfo; // javadocs
import org.apache.lucene.index.SegmentInfos; // javadocs
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.index.SegmentInfo;
/**
* Lucene 4.0 Segment info format.
* <p>
* Files:
* <ul>
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Attributes, Files
* </ul>
* </p>
* Data types:
* <p>
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>SegSize --&gt; {@link DataOutput#writeInt Int32}</li>
* <li>SegVersion --&gt; {@link DataOutput#writeString String}</li>
* <li>Files --&gt; {@link DataOutput#writeStringSet Set&lt;String&gt;}</li>
* <li>Diagnostics, Attributes --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
* </ul>
* </p>
* Field Descriptions:
* <p>
* <ul>
* <li>SegVersion is the code version that created the segment.</li>
* <li>SegSize is the number of documents contained in the segment index.</li>
* <li>IsCompoundFile records whether the segment is written as a compound file or
* not. If this is -1, the segment is not a compound file. If it is 1, the segment
* is a compound file.</li>
* <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up
* until the checksum. This is used to verify integrity of the file on opening the
* index.</li>
* <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
* for each segment it creates. It includes metadata like the current Lucene
* version, OS, Java version, why the segment was created (merge, flush,
* addIndexes), etc.</li>
* <li>Attributes: a key-value map of codec-private attributes.</li>
* <li>Files is a list of files referred to by this segment.</li>
* </ul>
* </p>
*
* @see SegmentInfos
* @lucene.experimental
* @deprecated Only for reading old 4.0-4.5 segments, and supporting IndexWriter.addIndexes
* @deprecated Only for reading old 4.0-4.5 segments
*/
@Deprecated
public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
@ -78,7 +35,7 @@ public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
}
@Override
public SegmentInfoReader getSegmentInfoReader() {
public final SegmentInfoReader getSegmentInfoReader() {
return reader;
}
@ -88,7 +45,7 @@ public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
}
/** File extension used to store {@link SegmentInfo}. */
public final static String SI_EXTENSION = "si";
static final String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene40SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;

View File

@ -34,14 +34,11 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
* Lucene 4.0 implementation of {@link SegmentInfoReader}.
*
* @see Lucene40SegmentInfoFormat
* @lucene.experimental
* Lucene 4.0 SI reader
* @deprecated Only for reading old 4.0-4.5 segments
*/
@Deprecated
public class Lucene40SegmentInfoReader extends SegmentInfoReader {
final class Lucene40SegmentInfoReader extends SegmentInfoReader {
/** Sole constructor. */
public Lucene40SegmentInfoReader() {

View File

@ -24,14 +24,11 @@ import org.apache.lucene.codecs.MultiLevelSkipListReader;
import org.apache.lucene.store.IndexInput;
/**
* Implements the skip list reader for the 4.0 posting list format
* that stores positions and payloads.
*
* @see Lucene40PostingsFormat
* Lucene 4.0 skiplist reader
* @deprecated Only for reading old 4.0 segments
*/
@Deprecated
public class Lucene40SkipListReader extends MultiLevelSkipListReader {
final class Lucene40SkipListReader extends MultiLevelSkipListReader {
private boolean currentFieldStoresPayloads;
private boolean currentFieldStoresOffsets;
private long freqPointer[];

View File

@ -19,66 +19,18 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Lucene 4.0 Stored Fields Format.
* <p>Stored fields are represented by two files:</p>
* <ol>
* <li><a name="field_index" id="field_index"></a>
* <p>The field index, or <tt>.fdx</tt> file.</p>
* <p>This is used to find the location within the field data file of the fields
* of a particular document. Because it contains fixed-length data, this file may
* be easily randomly accessed. The position of document <i>n</i> 's field data is
* the {@link DataOutput#writeLong Uint64} at <i>n*8</i> in this file.</p>
* <p>This contains, for each document, a pointer to its field data, as
* follows:</p>
* <ul>
* <li>FieldIndex (.fdx) --&gt; &lt;Header&gt;, &lt;FieldValuesPosition&gt; <sup>SegSize</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>FieldValuesPosition --&gt; {@link DataOutput#writeLong Uint64}</li>
* </ul>
* </li>
* <li>
* <p><a name="field_data" id="field_data"></a>The field data, or <tt>.fdt</tt> file.</p>
* <p>This contains the stored fields of each document, as follows:</p>
* <ul>
* <li>FieldData (.fdt) --&gt; &lt;Header&gt;, &lt;DocFieldData&gt; <sup>SegSize</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DocFieldData --&gt; FieldCount, &lt;FieldNum, Bits, Value&gt;
* <sup>FieldCount</sup></li>
* <li>FieldCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldNum --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Bits --&gt; {@link DataOutput#writeByte Byte}</li>
* <ul>
* <li>low order bit reserved.</li>
* <li>second bit is one for fields containing binary data</li>
* <li>third bit reserved.</li>
* <li>4th to 6th bit (mask: 0x7&lt;&lt;3) define the type of a numeric field:
* <ul>
* <li>all bits in mask are cleared if no numeric field at all</li>
* <li>1&lt;&lt;3: Value is Int</li>
* <li>2&lt;&lt;3: Value is Long</li>
* <li>3&lt;&lt;3: Value is Int as Float (as of {@link Float#intBitsToFloat(int)}</li>
* <li>4&lt;&lt;3: Value is Long as Double (as of {@link Double#longBitsToDouble(long)}</li>
* </ul>
* </li>
* </ul>
* <li>Value --&gt; String | BinaryValue | Int | Long (depending on Bits)</li>
* <li>BinaryValue --&gt; ValueSize, &lt;{@link DataOutput#writeByte Byte}&gt;^ValueSize</li>
* <li>ValueSize --&gt; {@link DataOutput#writeVInt VInt}</li>
* </li>
* </ul>
* </ol>
* @lucene.experimental */
* @deprecated only for reading 4.0 segments */
@Deprecated
public class Lucene40StoredFieldsFormat extends StoredFieldsFormat {
/** Sole constructor. */
@ -86,7 +38,7 @@ public class Lucene40StoredFieldsFormat extends StoredFieldsFormat {
}
@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
public final StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
FieldInfos fn, IOContext context) throws IOException {
return new Lucene40StoredFieldsReader(directory, si, fn, context);
}

View File

@ -40,14 +40,11 @@ import java.nio.charset.StandardCharsets;
import java.util.Collections;
/**
* Class responsible for access to stored document fields.
* <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @see Lucene40StoredFieldsFormat
* @lucene.internal
* Reader for 4.0 stored fields
* @deprecated only for reading 4.0 segments
*/
public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
@Deprecated
final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
// NOTE: bit 0 is free here! You can steal it!
static final int FIELD_IS_BINARY = 1 << 1;
@ -76,10 +73,10 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";
static final String FIELDS_EXTENSION = "fdt";
/** Extension of stored fields index file */
public static final String FIELDS_INDEX_EXTENSION = "fdx";
static final String FIELDS_INDEX_EXTENSION = "fdx";
private static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Lucene40StoredFieldsReader.class);

View File

@ -19,100 +19,19 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Lucene 4.0 Term Vectors format.
* <p>Term Vector support is an optional on a field by field basis. It consists of
* 3 files.</p>
* <ol>
* <li><a name="tvx" id="tvx"></a>
* <p>The Document Index or .tvx file.</p>
* <p>For each document, this stores the offset into the document data (.tvd) and
* field data (.tvf) files.</p>
* <p>DocumentIndex (.tvx) --&gt; Header,&lt;DocumentPosition,FieldPosition&gt;
* <sup>NumDocs</sup></p>
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DocumentPosition --&gt; {@link DataOutput#writeLong UInt64} (offset in the .tvd file)</li>
* <li>FieldPosition --&gt; {@link DataOutput#writeLong UInt64} (offset in the .tvf file)</li>
* </ul>
* </li>
* <li><a name="tvd" id="tvd"></a>
* <p>The Document or .tvd file.</p>
* <p>This contains, for each document, the number of fields, a list of the fields
* with term vector info and finally a list of pointers to the field information
* in the .tvf (Term Vector Fields) file.</p>
* <p>The .tvd file is used to map out the fields that have term vectors stored
* and where the field information is in the .tvf file.</p>
* <p>Document (.tvd) --&gt; Header,&lt;NumFields, FieldNums,
* FieldPositions&gt; <sup>NumDocs</sup></p>
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>NumFields --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldNums --&gt; &lt;FieldNumDelta&gt; <sup>NumFields</sup></li>
* <li>FieldNumDelta --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldPositions --&gt; &lt;FieldPositionDelta&gt; <sup>NumFields-1</sup></li>
* <li>FieldPositionDelta --&gt; {@link DataOutput#writeVLong VLong}</li>
* </ul>
* </li>
* <li><a name="tvf" id="tvf"></a>
* <p>The Field or .tvf file.</p>
* <p>This file contains, for each field that has a term vector stored, a list of
* the terms, their frequencies and, optionally, position, offset, and payload
* information.</p>
* <p>Field (.tvf) --&gt; Header,&lt;NumTerms, Flags, TermFreqs&gt;
* <sup>NumFields</sup></p>
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>NumTerms --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Flags --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>TermFreqs --&gt; &lt;TermText, TermFreq, Positions?, PayloadData?, Offsets?&gt;
* <sup>NumTerms</sup></li>
* <li>TermText --&gt; &lt;PrefixLength, Suffix&gt;</li>
* <li>PrefixLength --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Suffix --&gt; {@link DataOutput#writeString String}</li>
* <li>TermFreq --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Positions --&gt; &lt;PositionDelta PayloadLength?&gt;<sup>TermFreq</sup></li>
* <li>PositionDelta --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>PayloadLength --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>PayloadData --&gt; {@link DataOutput#writeByte Byte}<sup>NumPayloadBytes</sup></li>
* <li>Offsets --&gt; &lt;{@link DataOutput#writeVInt VInt}, {@link DataOutput#writeVInt VInt}&gt;<sup>TermFreq</sup></li>
* </ul>
* <p>Notes:</p>
* <ul>
* <li>Flags byte stores whether this term vector has position, offset, payload.
* information stored.</li>
* <li>Term byte prefixes are shared. The PrefixLength is the number of initial
* bytes from the previous term which must be pre-pended to a term's suffix
* in order to form the term's bytes. Thus, if the previous term's text was "bone"
* and the term is "boy", the PrefixLength is two and the suffix is "y".</li>
* <li>PositionDelta is, if payloads are disabled for the term's field, the
* difference between the position of the current occurrence in the document and
* the previous occurrence (or zero, if this is the first occurrence in this
* document). If payloads are enabled for the term's field, then PositionDelta/2
* is the difference between the current and the previous position. If payloads
* are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
* the length of the payload at the current term position.</li>
* <li>PayloadData is metadata associated with a term position. If
* PayloadLength is stored at the current position, then it indicates the length
* of this payload. If PayloadLength is not stored, then this payload has the same
* length as the payload at the previous position. PayloadData encodes the
* concatenated bytes for all of a terms occurrences.</li>
* <li>Offsets are stored as delta encoded VInts. The first VInt is the
* startOffset, the second is the endOffset.</li>
* </ul>
* </li>
* </ol>
* @deprecated only for reading 4.0 and 4.1 segments
*/
@Deprecated
public class Lucene40TermVectorsFormat extends TermVectorsFormat {
/** Sole constructor. */
@ -120,7 +39,7 @@ public class Lucene40TermVectorsFormat extends TermVectorsFormat {
}
@Override
public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
public final TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
return new Lucene40TermVectorsReader(directory, segmentInfo, fieldInfos, context);
}

View File

@ -48,12 +48,10 @@ import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.0 Term Vectors reader.
* <p>
* It reads .tvd, .tvf, and .tvx files.
*
* @see Lucene40TermVectorsFormat
* @deprecated only for reading 4.0 and 4.1 segments
*/
public class Lucene40TermVectorsReader extends TermVectorsReader implements Closeable {
@Deprecated
final class Lucene40TermVectorsReader extends TermVectorsReader implements Closeable {
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;

View File

@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene41;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
@ -36,14 +35,8 @@ import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
* Implements the Lucene 4.1 index format, with configurable per-field postings formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene41 package documentation for file format details.
* @deprecated Only for reading old 4.0 segments
* @lucene.experimental
* Implements the Lucene 4.1 index format
* @deprecated Only for reading old 4.1 segments
*/
@Deprecated
public class Lucene41Codec extends Codec {
@ -65,7 +58,6 @@ public class Lucene41Codec extends Codec {
super("Lucene41");
}
// TODO: slightly evil
@Override
public StoredFieldsFormat storedFieldsFormat() {
return fieldsFormat;

View File

@ -30,6 +30,7 @@ import org.apache.lucene.store.IOContext;
/**
* Lucene 4.1 stored fields format.
* @deprecated only for reading old 4.x segments
*/
@Deprecated
public class Lucene41StoredFieldsFormat extends StoredFieldsFormat {

View File

@ -34,7 +34,7 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
/**
* Random-access reader for {@code Lucene41CompressingStoredFieldsIndexWriter}.
* Reader for 4.x stored fields/term vectors index
* @deprecated only for reading old segments
*/
@Deprecated

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
@ -41,18 +40,10 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentWriteState;
/**
* Implements the Lucene 4.10 index format, with configurable per-field postings
* and docvalues formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene410 package documentation for file format details.
* @lucene.experimental
* Implements the Lucene 4.10 codec
* @deprecated only for reading old 4.10 segments
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene411Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
@Deprecated
public class Lucene410Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat();

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
@ -38,19 +37,9 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentWriteState;
/**
* Implements the Lucene 4.2 index format, with configurable per-field postings
* and docvalues formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene42 package documentation for file format details.
* @lucene.experimental
* Implements the Lucene 4.2 index format
* @deprecated Only for reading old 4.2 segments
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene43Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
@Deprecated
public class Lucene42Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();

View File

@ -19,119 +19,22 @@ package org.apache.lucene.codecs.lucene42;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.BlockPackedWriter;
/**
* Lucene 4.2 DocValues format.
* <p>
* Encodes the four per-document value types (Numeric,Binary,Sorted,SortedSet) with seven basic strategies.
* <p>
* <ul>
* <li>Delta-compressed Numerics: per-document integers written in blocks of 4096. For each block
* the minimum value is encoded, and each entry is a delta from that minimum value.
* <li>Table-compressed Numerics: when the number of unique values is very small, a lookup table
* is written instead. Each per-document entry is instead the ordinal to this table.
* <li>Uncompressed Numerics: when all values would fit into a single byte, and the
* <code>acceptableOverheadRatio</code> would pack values into 8 bits per value anyway, they
* are written as absolute values (with no indirection or packing) for performance.
* <li>GCD-compressed Numerics: when all numbers share a common divisor, such as dates, the greatest
* common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics.
* <li>Fixed-width Binary: one large concatenated byte[] is written, along with the fixed length.
* Each document's value can be addressed by maxDoc*length.
* <li>Variable-width Binary: one large concatenated byte[] is written, along with end addresses
* for each document. The addresses are written in blocks of 4096, with the current absolute
* start for the block, and the average (expected) delta per entry. For each document the
* deviation from the delta (actual - expected) is written.
* <li>Sorted: an FST mapping deduplicated terms to ordinals is written, along with the per-document
* ordinals written using one of the numeric strategies above.
* <li>SortedSet: an FST mapping deduplicated terms to ordinals is written, along with the per-document
* ordinal list written using one of the binary strategies above.
* </ul>
* <p>
* Files:
* <ol>
* <li><tt>.dvd</tt>: DocValues data</li>
* <li><tt>.dvm</tt>: DocValues metadata</li>
* </ol>
* <ol>
* <li><a name="dvm" id="dvm"></a>
* <p>The DocValues metadata or .dvm file.</p>
* <p>For DocValues field, this stores metadata, such as the offset into the
* DocValues data (.dvd)</p>
* <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry</li>
* <li>NumericEntry --&gt; DataOffset,CompressionType,PackedVersion</li>
* <li>BinaryEntry --&gt; DataOffset,DataLength,MinLength,MaxLength,PackedVersion?,BlockSize?</li>
* <li>SortedEntry --&gt; DataOffset,ValueCount</li>
* <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
* <p>SortedSet fields have two entries: a SortedEntry with the FST metadata,
* and an ordinary BinaryEntry for the document-to-ord-list metadata.</p>
* <p>FieldNumber of -1 indicates the end of metadata.</p>
* <p>EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)</p>
* <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
* <p>CompressionType indicates how Numeric values will be compressed:
* <ul>
* <li>0 --&gt; delta-compressed. For each block of 4096 integers, every integer is delta-encoded
* from the minimum value within the block.
* <li>1 --&gt; table-compressed. When the number of unique numeric values is small and it would save space,
* a lookup table of unique values is written, followed by the ordinal for each document.
* <li>2 --&gt; uncompressed. When the <code>acceptableOverheadRatio</code> parameter would upgrade the number
* of bits required to 8, and all values fit in a byte, these are written as absolute binary values
* for performance.
* <li>3 --&gt, gcd-compressed. When all integers share a common divisor, only quotients are stored
* using blocks of delta-encoded ints.
* </ul>
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
* If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
* Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
* is written for the addresses.
* <li><a name="dvd" id="dvd"></a>
* <p>The DocValues data or .dvd file.</p>
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
* <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</li>
* <li>BinaryData --&gt; {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
* <li>SortedData --&gt; {@link FST FST&lt;Int64&gt;}</li>
* <li>DeltaCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=4096)}</li>
* <li>TableCompressedNumerics --&gt; TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
* <li>UncompressedNumerics --&gt; {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li>
* <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=4096)}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
* </ol>
* <p>
* Limitations:
* <ul>
* <li> Binary doc values can be at most {@link #MAX_BINARY_FIELD_LENGTH} in length.
* </ul>
* @deprecated Only for reading old 4.2 segments
*/
@Deprecated
public class Lucene42DocValuesFormat extends DocValuesFormat {
/** Maximum length for each binary doc values field. */
public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
final float acceptableOverheadRatio;
@ -162,7 +65,7 @@ public class Lucene42DocValuesFormat extends DocValuesFormat {
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
public final DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new Lucene42DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
}

View File

@ -66,9 +66,11 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
/**
* Reader for {@link Lucene42DocValuesFormat}
* Reader for 4.2 docvalues
* @deprecated only for reading old 4.x segments
*/
class Lucene42DocValuesProducer extends DocValuesProducer {
@Deprecated
final class Lucene42DocValuesProducer extends DocValuesProducer {
// metadata maps (just file pointers and minimal stuff)
private final Map<String,NumericEntry> numerics;
private final Map<String,BinaryEntry> binaries;

View File

@ -19,70 +19,12 @@ package org.apache.lucene.codecs.lucene42;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.index.FieldInfo.DocValuesType; // javadoc
import org.apache.lucene.store.DataOutput; // javadoc
/**
* Lucene 4.2 Field Infos format.
* <p>
* <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p>
* <p>FieldInfos (.fnm) --&gt; Header,FieldsCount, &lt;FieldName,FieldNumber,
* FieldBits,DocValuesBits,Attributes&gt; <sup>FieldsCount</sup></p>
* <p>Data types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#checkHeader CodecHeader}</li>
* <li>FieldsCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldName --&gt; {@link DataOutput#writeString String}</li>
* <li>FieldBits, DocValuesBits --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt VInt}</li>
* <li>Attributes --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* </ul>
* </p>
* Field Descriptions:
* <ul>
* <li>FieldsCount: the number of fields in this file.</li>
* <li>FieldName: name of the field as a UTF-8 String.</li>
* <li>FieldNumber: the field's number. Note that unlike previous versions of
* Lucene, the fields are not numbered implicitly by their order in the
* file, instead explicitly.</li>
* <li>FieldBits: a byte containing field options.
* <ul>
* <li>The low-order bit is one for indexed fields, and zero for non-indexed
* fields.</li>
* <li>The second lowest-order bit is one for fields that have term vectors
* stored, and zero for fields without term vectors.</li>
* <li>If the third lowest order-bit is set (0x4), offsets are stored into
* the postings list in addition to positions.</li>
* <li>Fourth bit is unused.</li>
* <li>If the fifth lowest-order bit is set (0x10), norms are omitted for the
* indexed field.</li>
* <li>If the sixth lowest-order bit is set (0x20), payloads are stored for the
* indexed field.</li>
* <li>If the seventh lowest-order bit is set (0x40), term frequencies and
* positions omitted for the indexed field.</li>
* <li>If the eighth lowest-order bit is set (0x80), positions are omitted for the
* indexed field.</li>
* </ul>
* </li>
* <li>DocValuesBits: a byte containing per-document value types. The type
* recorded as two four-bit integers, with the high-order bits representing
* <code>norms</code> options, and the low-order bits representing
* {@code DocValues} options. Each four-bit integer can be decoded as such:
* <ul>
* <li>0: no DocValues for this field.</li>
* <li>1: NumericDocValues. ({@link DocValuesType#NUMERIC})</li>
* <li>2: BinaryDocValues. ({@code DocValuesType#BINARY})</li>
* <li>3: SortedDocValues. ({@code DocValuesType#SORTED})</li>
* </ul>
* </li>
* <li>Attributes: a key-value map of codec-private attributes.</li>
* </ul>
*
* @lucene.experimental
* @deprecated Only for reading old 4.2-4.5 segments
*/
@Deprecated
@ -94,7 +36,7 @@ public class Lucene42FieldInfosFormat extends FieldInfosFormat {
}
@Override
public FieldInfosReader getFieldInfosReader() throws IOException {
public final FieldInfosReader getFieldInfosReader() throws IOException {
return reader;
}

View File

@ -38,9 +38,7 @@ import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.2 FieldInfos reader.
*
* @lucene.experimental
* @deprecated Only for reading old 4.2-4.5 segments
* @see Lucene42FieldInfosFormat
*/
@Deprecated
final class Lucene42FieldInfosReader extends FieldInfosReader {

View File

@ -19,8 +19,6 @@ package org.apache.lucene.codecs.lucene42;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.NormsProducer;
@ -30,19 +28,9 @@ import org.apache.lucene.util.packed.PackedInts;
/**
* Lucene 4.2 score normalization format.
* <p>
* NOTE: this uses the same format as {@link Lucene42DocValuesFormat}
* Numeric DocValues, but with different file extensions, and passing
* {@link PackedInts#FASTEST} for uncompressed encoding: trading off
* space for performance.
* <p>
* Files:
* <ul>
* <li><tt>.nvd</tt>: DocValues data</li>
* <li><tt>.nvm</tt>: DocValues metadata</li>
* </ul>
* @see Lucene42DocValuesFormat
* @deprecated only for reading old 4.x segments
*/
@Deprecated
public class Lucene42NormsFormat extends NormsFormat {
final float acceptableOverheadRatio;
@ -73,7 +61,7 @@ public class Lucene42NormsFormat extends NormsFormat {
}
@Override
public NormsProducer normsProducer(SegmentReadState state) throws IOException {
public final NormsProducer normsProducer(SegmentReadState state) throws IOException {
return new Lucene42NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.codecs.lucene42;
*/
import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.index.FieldInfo;
@ -28,11 +27,10 @@ import org.apache.lucene.util.Accountable;
/**
* Reads 4.2-4.8 norms.
* Implemented the same as docvalues, but with a different filename.
* @deprecated Only for reading old segments
*/
@Deprecated
class Lucene42NormsProducer extends NormsProducer {
final class Lucene42NormsProducer extends NormsProducer {
private final Lucene42DocValuesProducer impl;
Lucene42NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {

View File

@ -55,7 +55,7 @@ import org.apache.lucene.util.packed.BlockPackedReaderIterator;
import org.apache.lucene.util.packed.PackedInts;
/**
* {@link TermVectorsReader} for {@code Lucene42TermVectorsFormat}.
* 4.2 term vectors reader
* @deprecated only for reading old segments
*/
@Deprecated

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
@ -41,19 +40,9 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentWriteState;
/**
* Implements the Lucene 4.5 index format, with configurable per-field postings
* and docvalues formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene45 package documentation for file format details.
* @lucene.experimental
* Implements the Lucene 4.5 index format
* @deprecated Only for reading old 4.3-4.5 segments
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene46Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
@Deprecated
public class Lucene45Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();

View File

@ -39,7 +39,11 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/** writer for {@link Lucene45DocValuesFormat} */
/**
* writer for 4.5 docvalues format
* @deprecated only for old 4.x segments
*/
@Deprecated
class Lucene45DocValuesConsumer extends DocValuesConsumer implements Closeable {
static final int BLOCK_SIZE = 16384;

View File

@ -19,148 +19,15 @@ package org.apache.lucene.codecs.lucene45;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.SmallFloat;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
* Lucene 4.5 DocValues format.
* <p>
* Encodes the four per-document value types (Numeric,Binary,Sorted,SortedSet) with these strategies:
* <p>
* {@link DocValuesType#NUMERIC NUMERIC}:
* <ul>
* <li>Delta-compressed: per-document integers written in blocks of 16k. For each block
* the minimum value in that block is encoded, and each entry is a delta from that
* minimum value. Each block of deltas is compressed with bitpacking. For more
* information, see {@link BlockPackedWriter}.
* <li>Table-compressed: when the number of unique values is very small (&lt; 256), and
* when there are unused "gaps" in the range of values used (such as {@link SmallFloat}),
* a lookup table is written instead. Each per-document entry is instead the ordinal
* to this table, and those ordinals are compressed with bitpacking ({@link PackedInts}).
* <li>GCD-compressed: when all numbers share a common divisor, such as dates, the greatest
* common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics.
* </ul>
* <p>
* {@link DocValuesType#BINARY BINARY}:
* <ul>
* <li>Fixed-width Binary: one large concatenated byte[] is written, along with the fixed length.
* Each document's value can be addressed directly with multiplication ({@code docID * length}).
* <li>Variable-width Binary: one large concatenated byte[] is written, along with end addresses
* for each document. The addresses are written in blocks of 16k, with the current absolute
* start for the block, and the average (expected) delta per entry. For each document the
* deviation from the delta (actual - expected) is written.
* <li>Prefix-compressed Binary: values are written in chunks of 16, with the first value written
* completely and other values sharing prefixes. chunk addresses are written in blocks of 16k,
* with the current absolute start for the block, and the average (expected) delta per entry.
* For each chunk the deviation from the delta (actual - expected) is written.
* </ul>
* <p>
* {@link DocValuesType#SORTED SORTED}:
* <ul>
* <li>Sorted: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary,
* along with the per-document ordinals written using one of the numeric strategies above.
* </ul>
* <p>
* {@link DocValuesType#SORTED_SET SORTED_SET}:
* <ul>
* <li>SortedSet: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary,
* an ordinal list and per-document index into this list are written using the numeric strategies
* above.
* </ul>
* <p>
* Files:
* <ol>
* <li><tt>.dvd</tt>: DocValues data</li>
* <li><tt>.dvm</tt>: DocValues metadata</li>
* </ol>
* <ol>
* <li><a name="dvm" id="dvm"></a>
* <p>The DocValues metadata or .dvm file.</p>
* <p>For DocValues field, this stores metadata, such as the offset into the
* DocValues data (.dvd)</p>
* <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li>
* <li>NumericEntry --&gt; GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
* <li>GCDNumericEntry --&gt; NumericHeader,MinValue,GCD</li>
* <li>TableNumericEntry --&gt; NumericHeader,TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup></li>
* <li>DeltaNumericEntry --&gt; NumericHeader</li>
* <li>NumericHeader --&gt; FieldNumber,EntryType,NumericType,MissingOffset,PackedVersion,DataOffset,Count,BlockSize</li>
* <li>BinaryEntry --&gt; FixedBinaryEntry | VariableBinaryEntry | PrefixBinaryEntry</li>
* <li>FixedBinaryEntry --&gt; BinaryHeader</li>
* <li>VariableBinaryEntry --&gt; BinaryHeader,AddressOffset,PackedVersion,BlockSize</li>
* <li>PrefixBinaryEntry --&gt; BinaryHeader,AddressInterval,AddressOffset,PackedVersion,BlockSize</li>
* <li>BinaryHeader --&gt; FieldNumber,EntryType,BinaryType,MissingOffset,MinLength,MaxLength,DataOffset</li>
* <li>SortedEntry --&gt; FieldNumber,EntryType,BinaryEntry,NumericEntry</li>
* <li>SortedSetEntry --&gt; EntryType,BinaryEntry,NumericEntry,NumericEntry</li>
* <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
* <p>SortedSet fields have three entries: a BinaryEntry with the value metadata,
* and two NumericEntries for the document-to-ord-index and ordinal list metadata.</p>
* <p>FieldNumber of -1 indicates the end of metadata.</p>
* <p>EntryType is a 0 (NumericEntry) or 1 (BinaryEntry)</p>
* <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
* <p>NumericType indicates how Numeric values will be compressed:
* <ul>
* <li>0 --&gt; delta-compressed. For each block of 16k integers, every integer is delta-encoded
* from the minimum value within the block.
* <li>1 --&gt, gcd-compressed. When all integers share a common divisor, only quotients are stored
* using blocks of delta-encoded ints.
* <li>2 --&gt; table-compressed. When the number of unique numeric values is small and it would save space,
* a lookup table of unique values is written, followed by the ordinal for each document.
* </ul>
* <p>BinaryType indicates how Binary values will be stored:
* <ul>
* <li>0 --&gt; fixed-width. All values have the same length, addressing by multiplication.
* <li>1 --&gt, variable-width. An address for each value is stored.
* <li>2 --&gt; prefix-compressed. An address to the start of every interval'th value is stored.
* </ul>
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
* If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
* Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
* is written for the addresses.
* <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
* If its -1, then there are no missing values.
* <p>Checksum contains the CRC32 checksum of all bytes in the .dvm file up
* until the checksum. This is used to verify integrity of the file on opening the
* index.
* <li><a name="dvd" id="dvd"></a>
* <p>The DocValues data or .dvd file.</p>
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
* <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics</li>
* <li>BinaryData --&gt; {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
* <li>SortedData --&gt; {@link FST FST&lt;Int64&gt;}</li>
* <li>DeltaCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
* <li>TableCompressedNumerics --&gt; {@link PackedInts PackedInts}</li>
* <li>GCDCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
* <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
* </ol>
* @deprecated Only for reading old 4.3-4.5 segments
* @lucene.experimental
*/
@Deprecated
public class Lucene45DocValuesFormat extends DocValuesFormat {
@ -177,7 +44,7 @@ public class Lucene45DocValuesFormat extends DocValuesFormat {
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
public final DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new Lucene45DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
}

View File

@ -63,13 +63,16 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
/** reader for {@link Lucene45DocValuesFormat} */
/**
* reader for 4.5 docvalues format
* @deprecated only for reading old 4.x segments
*/
@Deprecated
class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map<Integer,NumericEntry> numerics;
private final Map<Integer,BinaryEntry> binaries;

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
@ -39,19 +38,9 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentWriteState;
/**
* Implements the Lucene 4.6 index format, with configurable per-field postings
* and docvalues formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene46 package documentation for file format details.
* @lucene.experimental
* Implements the Lucene 4.6 index format
* @deprecated Only for reading old 4.6-4.8 segments
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene46Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
@Deprecated
public class Lucene46Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();

View File

@ -19,77 +19,15 @@ package org.apache.lucene.codecs.lucene46;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.store.DataOutput;
/**
* Lucene 4.6 Field Infos format.
* <p>
* <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p>
* <p>FieldInfos (.fnm) --&gt; Header,FieldsCount, &lt;FieldName,FieldNumber,
* FieldBits,DocValuesBits,DocValuesGen,Attributes&gt; <sup>FieldsCount</sup>,Footer</p>
* <p>Data types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#checkHeader CodecHeader}</li>
* <li>FieldsCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldName --&gt; {@link DataOutput#writeString String}</li>
* <li>FieldBits, DocValuesBits --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt VInt}</li>
* <li>Attributes --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>DocValuesGen --&gt; {@link DataOutput#writeLong(long) Int64}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
* <ul>
* <li>FieldsCount: the number of fields in this file.</li>
* <li>FieldName: name of the field as a UTF-8 String.</li>
* <li>FieldNumber: the field's number. Note that unlike previous versions of
* Lucene, the fields are not numbered implicitly by their order in the
* file, instead explicitly.</li>
* <li>FieldBits: a byte containing field options.
* <ul>
* <li>The low-order bit is one for indexed fields, and zero for non-indexed
* fields.</li>
* <li>The second lowest-order bit is one for fields that have term vectors
* stored, and zero for fields without term vectors.</li>
* <li>If the third lowest order-bit is set (0x4), offsets are stored into
* the postings list in addition to positions.</li>
* <li>Fourth bit is unused.</li>
* <li>If the fifth lowest-order bit is set (0x10), norms are omitted for the
* indexed field.</li>
* <li>If the sixth lowest-order bit is set (0x20), payloads are stored for the
* indexed field.</li>
* <li>If the seventh lowest-order bit is set (0x40), term frequencies and
* positions omitted for the indexed field.</li>
* <li>If the eighth lowest-order bit is set (0x80), positions are omitted for the
* indexed field.</li>
* </ul>
* </li>
* <li>DocValuesBits: a byte containing per-document value types. The type
* recorded as two four-bit integers, with the high-order bits representing
* <code>norms</code> options, and the low-order bits representing
* {@code DocValues} options. Each four-bit integer can be decoded as such:
* <ul>
* <li>0: no DocValues for this field.</li>
* <li>1: NumericDocValues. ({@link DocValuesType#NUMERIC})</li>
* <li>2: BinaryDocValues. ({@code DocValuesType#BINARY})</li>
* <li>3: SortedDocValues. ({@code DocValuesType#SORTED})</li>
* </ul>
* </li>
* <li>DocValuesGen is the generation count of the field's DocValues. If this is -1,
* there are no DocValues updates to that field. Anything above zero means there
* are updates stored by {@link DocValuesFormat}.</li>
* <li>Attributes: a key-value map of codec-private attributes.</li>
* </ul>
*
* @lucene.experimental
* @deprecated only for old 4.x segments
*/
@Deprecated
public final class Lucene46FieldInfosFormat extends FieldInfosFormat {
private final FieldInfosReader reader = new Lucene46FieldInfosReader();
private final FieldInfosWriter writer = new Lucene46FieldInfosWriter();
@ -99,7 +37,7 @@ public final class Lucene46FieldInfosFormat extends FieldInfosFormat {
}
@Override
public FieldInfosReader getFieldInfosReader() throws IOException {
public final FieldInfosReader getFieldInfosReader() throws IOException {
return reader;
}

View File

@ -38,9 +38,9 @@ import org.apache.lucene.store.IndexInput;
/**
* Lucene 4.6 FieldInfos reader.
*
* @lucene.experimental
* @see Lucene46FieldInfosFormat
* @deprecated only for old 4.x segments
*/
@Deprecated
final class Lucene46FieldInfosReader extends FieldInfosReader {
/** Sole constructor. */

View File

@ -34,9 +34,9 @@ import org.apache.lucene.store.IOContext;
/**
* Lucene 4.6 FieldInfos writer.
*
* @see Lucene46FieldInfosFormat
* @lucene.experimental
* @deprecated only for old 4.x segments
*/
@Deprecated
final class Lucene46FieldInfosWriter extends FieldInfosWriter {
/** Sole constructor. */

View File

@ -17,54 +17,16 @@ package org.apache.lucene.codecs.lucene46;
* limitations under the License.
*/
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.SegmentInfo; // javadocs
import org.apache.lucene.index.SegmentInfos; // javadocs
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.index.SegmentInfo;
/**
* Lucene 4.6 Segment info format.
* <p>
* Files:
* <ul>
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
* </ul>
* </p>
* Data types:
* <p>
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>SegSize --&gt; {@link DataOutput#writeInt Int32}</li>
* <li>SegVersion --&gt; {@link DataOutput#writeString String}</li>
* <li>Files --&gt; {@link DataOutput#writeStringSet Set&lt;String&gt;}</li>
* <li>Diagnostics --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
* <p>
* <ul>
* <li>SegVersion is the code version that created the segment.</li>
* <li>SegSize is the number of documents contained in the segment index.</li>
* <li>IsCompoundFile records whether the segment is written as a compound file or
* not. If this is -1, the segment is not a compound file. If it is 1, the segment
* is a compound file.</li>
* <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
* for each segment it creates. It includes metadata like the current Lucene
* version, OS, Java version, why the segment was created (merge, flush,
* addIndexes), etc.</li>
* <li>Files is a list of files referred to by this segment.</li>
* </ul>
* </p>
*
* @see SegmentInfos
* @lucene.experimental
* @deprecated only for old 4.x segments
*/
@Deprecated
public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoReader reader = new Lucene46SegmentInfoReader();
@ -73,7 +35,7 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
}
@Override
public SegmentInfoReader getSegmentInfoReader() {
public final SegmentInfoReader getSegmentInfoReader() {
return reader;
}
@ -83,7 +45,7 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
}
/** File extension used to store {@link SegmentInfo}. */
public final static String SI_EXTENSION = "si";
final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene46SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CHECKSUM = 1;

View File

@ -33,12 +33,11 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Version;
/**
* Lucene 4.6 implementation of {@link SegmentInfoReader}.
*
* @see Lucene46SegmentInfoFormat
* @lucene.experimental
* Lucene 4.6 segment infos reader
* @deprecated only for old 4.x segments
*/
public class Lucene46SegmentInfoReader extends SegmentInfoReader {
@Deprecated
final class Lucene46SegmentInfoReader extends SegmentInfoReader {
/** Sole constructor. */
public Lucene46SegmentInfoReader() {

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
@ -40,18 +39,10 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentWriteState;
/**
* Implements the Lucene 4.9 index format, with configurable per-field postings
* and docvalues formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene49 package documentation for file format details.
* @lucene.experimental
* Implements the Lucene 4.9 index format
* @deprecated only for old 4.x segments
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene410Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
@Deprecated
public class Lucene49Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat();

View File

@ -40,7 +40,11 @@ import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/** writer for {@link Lucene49DocValuesFormat} */
/**
* writer for 4.9 docvalues format
* @deprecated only for old 4.x segments
*/
@Deprecated
class Lucene49DocValuesConsumer extends DocValuesConsumer implements Closeable {
static final int BLOCK_SIZE = 16384;

View File

@ -19,151 +19,17 @@ package org.apache.lucene.codecs.lucene49;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.SmallFloat;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
/**
* Lucene 4.9 DocValues format.
* <p>
* Encodes the five per-document value types (Numeric,Binary,Sorted,SortedSet,SortedNumeric) with these strategies:
* <p>
* {@link DocValuesType#NUMERIC NUMERIC}:
* <ul>
* <li>Delta-compressed: per-document integers written as deltas from the minimum value,
* compressed with bitpacking. For more information, see {@link DirectWriter}.
* <li>Table-compressed: when the number of unique values is very small (&lt; 256), and
* when there are unused "gaps" in the range of values used (such as {@link SmallFloat}),
* a lookup table is written instead. Each per-document entry is instead the ordinal
* to this table, and those ordinals are compressed with bitpacking ({@link DirectWriter}).
* <li>GCD-compressed: when all numbers share a common divisor, such as dates, the greatest
* common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics.
* <li>Monotonic-compressed: when all numbers are monotonically increasing offsets, they are written
* as blocks of bitpacked integers, encoding the deviation from the expected delta.
* </ul>
* <p>
* {@link DocValuesType#BINARY BINARY}:
* <ul>
* <li>Fixed-width Binary: one large concatenated byte[] is written, along with the fixed length.
* Each document's value can be addressed directly with multiplication ({@code docID * length}).
* <li>Variable-width Binary: one large concatenated byte[] is written, along with end addresses
* for each document. The addresses are written as Monotonic-compressed numerics.
* <li>Prefix-compressed Binary: values are written in chunks of 16, with the first value written
* completely and other values sharing prefixes. chunk addresses are written as Monotonic-compressed
* numerics.
* </ul>
* <p>
* {@link DocValuesType#SORTED SORTED}:
* <ul>
* <li>Sorted: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary,
* along with the per-document ordinals written using one of the numeric strategies above.
* </ul>
* <p>
* {@link DocValuesType#SORTED_SET SORTED_SET}:
* <ul>
* <li>SortedSet: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary,
* an ordinal list and per-document index into this list are written using the numeric strategies
* above.
* </ul>
* <p>
* {@link DocValuesType#SORTED_NUMERIC SORTED_NUMERIC}:
* <ul>
* <li>SortedNumeric: a value list and per-document index into this list are written using the numeric
* strategies above.
* </ul>
* <p>
* Files:
* <ol>
* <li><tt>.dvd</tt>: DocValues data</li>
* <li><tt>.dvm</tt>: DocValues metadata</li>
* </ol>
* <ol>
* <li><a name="dvm" id="dvm"></a>
* <p>The DocValues metadata or .dvm file.</p>
* <p>For DocValues field, this stores metadata, such as the offset into the
* DocValues data (.dvd)</p>
* <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry | SortedNumericEntry</li>
* <li>NumericEntry --&gt; GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
* <li>GCDNumericEntry --&gt; NumericHeader,MinValue,GCD,BitsPerValue</li>
* <li>TableNumericEntry --&gt; NumericHeader,TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,BitsPerValue</li>
* <li>DeltaNumericEntry --&gt; NumericHeader,MinValue,BitsPerValue</li>
* <li>MonotonicNumericEntry --&gt; NumericHeader,PackedVersion,BlockSize</li>
* <li>NumericHeader --&gt; FieldNumber,EntryType,NumericType,MissingOffset,DataOffset,Count,EndOffset</li>
* <li>BinaryEntry --&gt; FixedBinaryEntry | VariableBinaryEntry | PrefixBinaryEntry</li>
* <li>FixedBinaryEntry --&gt; BinaryHeader</li>
* <li>VariableBinaryEntry --&gt; BinaryHeader,AddressOffset,PackedVersion,BlockSize</li>
* <li>PrefixBinaryEntry --&gt; BinaryHeader,AddressInterval,AddressOffset,PackedVersion,BlockSize</li>
* <li>BinaryHeader --&gt; FieldNumber,EntryType,BinaryType,MissingOffset,MinLength,MaxLength,DataOffset</li>
* <li>SortedEntry --&gt; FieldNumber,EntryType,BinaryEntry,NumericEntry</li>
* <li>SortedSetEntry --&gt; EntryType,BinaryEntry,NumericEntry,NumericEntry</li>
* <li>SortedNumericEntry --&gt; EntryType,NumericEntry,NumericEntry</li>
* <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset,EndOffset --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>TableSize,BitsPerValue --&gt; {@link DataOutput#writeVInt vInt}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
* <p>SortedSet fields have three entries: a BinaryEntry with the value metadata,
* and two NumericEntries for the document-to-ord-index and ordinal list metadata.</p>
* <p>SortedNumeric fields have two entries: A NumericEntry with the value metadata,
* and a numeric entry with the document-to-value index.</p>
* <p>FieldNumber of -1 indicates the end of metadata.</p>
* <p>EntryType is a 0 (NumericEntry) or 1 (BinaryEntry)</p>
* <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
* <p>EndOffset is the pointer to the end of the data in the DocValues data (.dvd)</p>
* <p>NumericType indicates how Numeric values will be compressed:
* <ul>
* <li>0 --&gt; delta-compressed. For each block of 16k integers, every integer is delta-encoded
* from the minimum value within the block.
* <li>1 --&gt, gcd-compressed. When all integers share a common divisor, only quotients are stored
* using blocks of delta-encoded ints.
* <li>2 --&gt; table-compressed. When the number of unique numeric values is small and it would save space,
* a lookup table of unique values is written, followed by the ordinal for each document.
* </ul>
* <p>BinaryType indicates how Binary values will be stored:
* <ul>
* <li>0 --&gt; fixed-width. All values have the same length, addressing by multiplication.
* <li>1 --&gt, variable-width. An address for each value is stored.
* <li>2 --&gt; prefix-compressed. An address to the start of every interval'th value is stored.
* </ul>
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
* If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
* Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
* is written for the addresses.
* <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
* If its -1, then there are no missing values.
* <p>Checksum contains the CRC32 checksum of all bytes in the .dvm file up
* until the checksum. This is used to verify integrity of the file on opening the
* index.
* <li><a name="dvd" id="dvd"></a>
* <p>The DocValues data or .dvd file.</p>
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
* <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics</li>
* <li>BinaryData --&gt; {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
* <li>SortedData --&gt; {@link FST FST&lt;Int64&gt;}</li>
* <li>DeltaCompressedNumerics,TableCompressedNumerics,GCDCompressedNumerics --&gt; {@link DirectWriter PackedInts}</li>
* <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </ol>
* @lucene.experimental
* @deprecated only for old 4.x segments
*/
@Deprecated
public class Lucene49DocValuesFormat extends DocValuesFormat {
/** Sole Constructor */
@ -177,7 +43,7 @@ public class Lucene49DocValuesFormat extends DocValuesFormat {
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
public final DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new Lucene49DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
}

View File

@ -67,7 +67,11 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.DirectReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
/** reader for {@link Lucene49DocValuesFormat} */
/**
* reader for 4.9 docvalues format
* @deprecated only for 4.x segments
*/
@Deprecated
class Lucene49DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map<String,NumericEntry> numerics;
private final Map<String,BinaryEntry> binaries;

View File

@ -26,7 +26,6 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.MissingOrdRemapper;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@ -37,7 +36,12 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
class Lucene40DocValuesWriter extends DocValuesConsumer {
/**
* Writer for 4.0 docvalues format
* @deprecated for test purposes only
*/
@Deprecated
final class Lucene40DocValuesWriter extends DocValuesConsumer {
private final Directory dir;
private final SegmentWriteState state;
private final String legacyKey;

View File

@ -33,13 +33,11 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.0 FieldInfos writer.
*
* @see Lucene40FieldInfosFormat
* @lucene.experimental
* Writer for 4.0 fieldinfos format
* @deprecated for test purposes only
*/
@Deprecated
public class Lucene40FieldInfosWriter extends FieldInfosWriter {
public final class Lucene40FieldInfosWriter extends FieldInfosWriter {
/** Sole constructor. */
public Lucene40FieldInfosWriter() {

View File

@ -37,12 +37,11 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* Concrete class that writes the 4.0 frq/prx postings format.
*
* @see Lucene40PostingsFormat
* @lucene.experimental
* Writer for 4.0 postings format
* @deprecated for test purposes only
*/
public final class Lucene40PostingsWriter extends PushPostingsWriterBase {
@Deprecated
final class Lucene40PostingsWriter extends PushPostingsWriterBase {
final IndexOutput freqOut;
final IndexOutput proxOut;

View File

@ -28,8 +28,11 @@ import org.apache.lucene.codecs.TermVectorsFormat;
* limitations under the License.
*/
/** Read-write version of Lucene40Codec for testing */
@SuppressWarnings("deprecation")
/**
* Read-write version of 4.0 codec for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene40RWCodec extends Lucene40Codec {
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() {

View File

@ -23,9 +23,12 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
/** Read-write version of {@link Lucene40DocValuesFormat} for testing */
@SuppressWarnings("deprecation")
public class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat {
/**
* Read-write version of 4.0 docvalues format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {

View File

@ -24,9 +24,12 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
/** Read-write version of {@link Lucene40NormsFormat} for testing */
@SuppressWarnings("deprecation")
public class Lucene40RWNormsFormat extends Lucene40NormsFormat {
/**
* Read-write version of 4.0 norms format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene40RWNormsFormat extends Lucene40NormsFormat {
@Override
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {

View File

@ -23,14 +23,19 @@ import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.LuceneTestCase;
/**
* Read-write version of {@link Lucene40PostingsFormat} for testing.
* Read-write version of 4.0 postings format for testing
* @deprecated for test purposes only
*/
@SuppressWarnings("deprecation")
public class Lucene40RWPostingsFormat extends Lucene40PostingsFormat {
@Deprecated
public final class Lucene40RWPostingsFormat extends Lucene40PostingsFormat {
/** minimum items (terms or sub-blocks) per block for 4.0 BlockTree */
final static int MIN_BLOCK_SIZE = 25;
/** maximum items (terms or sub-blocks) per block for 4.0 BlockTree */
final static int MAX_BLOCK_SIZE = 48;
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docs = new Lucene40PostingsWriter(state);
@ -41,7 +46,7 @@ public class Lucene40RWPostingsFormat extends Lucene40PostingsFormat {
// Or... you must make a new Codec for this?
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize);
FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
success = true;
return ret;
} finally {

View File

@ -19,8 +19,12 @@ package org.apache.lucene.codecs.lucene40;
import org.apache.lucene.codecs.SegmentInfoWriter;
/** read-write version of 4.0 segmentinfos for testing */
public class Lucene40RWSegmentInfoFormat extends Lucene40SegmentInfoFormat {
/**
* Read-write version of 4.0 segmentinfo format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene40RWSegmentInfoFormat extends Lucene40SegmentInfoFormat {
@Override
public SegmentInfoWriter getSegmentInfoWriter() {

View File

@ -24,10 +24,12 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Simulates writing Lucene 4.0 Stored Fields Format.
*/
public class Lucene40RWStoredFieldsFormat extends Lucene40StoredFieldsFormat {
/**
* Read-write version of 4.0 stored fields format for testing
* @deprecated for test purposes only
*/
@Deprecated
final class Lucene40RWStoredFieldsFormat extends Lucene40StoredFieldsFormat {
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {

View File

@ -23,12 +23,13 @@ import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.LuceneTestCase;
/**
* Simulates writing Lucene 4.0 Stored Fields Format.
*/
public class Lucene40RWTermVectorsFormat extends Lucene40TermVectorsFormat {
/**
* Read-write version of 4.0 term vectors format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene40RWTermVectorsFormat extends Lucene40TermVectorsFormat {
@Override
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {

View File

@ -31,13 +31,11 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.0 implementation of {@link SegmentInfoWriter}.
*
* @see Lucene40SegmentInfoFormat
* @lucene.experimental
* writer for 4.0 segmentinfos for testing
* @deprecated for test purposes only
*/
@Deprecated
public class Lucene40SegmentInfoWriter extends SegmentInfoWriter {
public final class Lucene40SegmentInfoWriter extends SegmentInfoWriter {
/** Sole constructor. */
public Lucene40SegmentInfoWriter() {

View File

@ -25,14 +25,11 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
/**
* Implements the skip list writer for the 4.0 posting list format
* that stores positions and payloads.
*
* @see Lucene40PostingsFormat
* @deprecated Only for reading old 4.0 segments
* Writer of 4.0 skip lists for testing
* @deprecated for test purposes only
*/
@Deprecated
public class Lucene40SkipListWriter extends MultiLevelSkipListWriter {
final class Lucene40SkipListWriter extends MultiLevelSkipListWriter {
private int[] lastSkipDoc;
private int[] lastSkipPayloadLength;
private int[] lastSkipOffsetLength;

View File

@ -34,15 +34,12 @@ import org.apache.lucene.util.IOUtils;
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsReader.*;
/**
* Class responsible for writing stored document fields.
* <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @see Lucene40StoredFieldsFormat
* @lucene.experimental
/**
* Writer for 4.0 stored fields format for testing
* @deprecated for test purposes only
*/
public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
@Deprecated
final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
private final Directory directory;
private final String segment;

View File

@ -36,24 +36,12 @@ import org.apache.lucene.util.StringHelper;
import static org.apache.lucene.codecs.lucene40.Lucene40TermVectorsReader.*;
// TODO: make a new 4.0 TV format that encodes better
// - use startOffset (not endOffset) as base for delta on
// next startOffset because today for syns or ngrams or
// WDF or shingles etc. we are encoding negative vints
// (= slow, 5 bytes per)
// - if doc has no term vectors, write 0 into the tvx
// file; saves a seek to tvd only to read a 0 vint (and
// saves a byte in tvd)
/**
* Lucene 4.0 Term Vectors writer.
* <p>
* It writes .tvd, .tvf, and .tvx files.
*
* @see Lucene40TermVectorsFormat
* Writer for 4.0 term vectors format for testing
* @deprecated for test purposes only
*/
public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
@Deprecated
final class Lucene40TermVectorsWriter extends TermVectorsWriter {
private final Directory directory;
private final String segment;
private IndexOutput tvx = null, tvd = null, tvf = null;

View File

@ -24,7 +24,6 @@ import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
/**
* <code>TestBitVector</code> tests the <code>BitVector</code>, obviously.

View File

@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene40;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
import org.junit.BeforeClass;
public class TestLucene40StoredFieldsFormat extends BaseStoredFieldsFormatTestCase {

View File

@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene40;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseTermVectorsFormatTestCase;
import org.junit.BeforeClass;
public class TestLucene40TermVectorsFormat extends BaseTermVectorsFormatTestCase {

View File

@ -34,10 +34,11 @@ import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
*/
/**
* Read-write version of {@link Lucene41Codec} for testing.
* Read-write version of 4.1 codec for testing
* @deprecated for test purposes only
*/
@SuppressWarnings("deprecation")
public class Lucene41RWCodec extends Lucene41Codec {
@Deprecated
public final class Lucene41RWCodec extends Lucene41Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41RWStoredFieldsFormat();
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() {
@Override

View File

@ -24,8 +24,12 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** read-write version of Lucene41StoredsFieldsFormat for testing */
public class Lucene41RWStoredFieldsFormat extends Lucene41StoredFieldsFormat {
/**
* Read-write version of 4.1 stored fields format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene41RWStoredFieldsFormat extends Lucene41StoredFieldsFormat {
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
return new Lucene41StoredFieldsWriter(directory, si, SEGMENT_SUFFIX, context, FORMAT_NAME, COMPRESSION_MODE, CHUNK_SIZE);

View File

@ -27,8 +27,10 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.packed.PackedInts;
/**
* writer for lucene 4.x stored fields/vectors index for testing
* Writer for 4.1 stored fields/term vectors index for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene41StoredFieldsIndexWriter implements Closeable {
static final int BLOCK_SIZE = 1024; // number of chunks to serialize at once

View File

@ -53,8 +53,10 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
/**
* writer for Lucene 4.1 stored fields for testing
* Writer for 4.1 stored fields format for testing
* @deprecated for test purposes only
*/
@Deprecated
final class Lucene41StoredFieldsWriter extends StoredFieldsWriter {
// hard limit on the maximum number of documents per chunk

View File

@ -34,7 +34,6 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.MathUtil;
import org.apache.lucene.util.fst.Builder;
@ -58,9 +57,11 @@ import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.TABLE_
import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.UNCOMPRESSED;
/**
* Writer for {@link Lucene42DocValuesFormat}
* Writer for 4.2 docvalues format for testing
* @deprecated for test purposes only
*/
class Lucene42DocValuesConsumer extends DocValuesConsumer {
@Deprecated
final class Lucene42DocValuesConsumer extends DocValuesConsumer {
final IndexOutput data, meta;
final int maxDoc;
final float acceptableOverheadRatio;

View File

@ -33,10 +33,8 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.2 FieldInfos writer.
*
* @see Lucene42FieldInfosFormat
* @lucene.experimental
* Writer for 4.2 fieldinfos format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene42FieldInfosWriter extends FieldInfosWriter {

View File

@ -36,9 +36,11 @@ import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT;
/**
* Writer for {@link Lucene42NormsFormat}
* Writer for 4.2 norms format for testing
* @deprecated for test purposes only
*/
class Lucene42NormsConsumer extends NormsConsumer {
@Deprecated
final class Lucene42NormsConsumer extends NormsConsumer {
static final byte NUMBER = 0;
static final int BLOCK_SIZE = 4096;

View File

@ -28,13 +28,13 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
import org.apache.lucene.util.LuceneTestCase;
/**
* Read-write version of {@link Lucene42Codec} for testing.
* Read-Write version of 4.2 codec for testing
* @deprecated for test purposes only
*/
@SuppressWarnings("deprecation")
public class Lucene42RWCodec extends Lucene42Codec {
@Deprecated
public final class Lucene42RWCodec extends Lucene42Codec {
private static final DocValuesFormat dv = new Lucene42RWDocValuesFormat();
private static final NormsFormat norms = new Lucene42RWNormsFormat();

View File

@ -21,13 +21,13 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.LuceneTestCase;
/**
* Read-write version of {@link Lucene42DocValuesFormat} for testing.
* Read-Write version of 4.2 docvalues format for testing
* @deprecated for test purposes only
*/
@SuppressWarnings("deprecation")
public class Lucene42RWDocValuesFormat extends Lucene42DocValuesFormat {
@Deprecated
public final class Lucene42RWDocValuesFormat extends Lucene42DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {

View File

@ -21,12 +21,13 @@ import java.io.IOException;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.LuceneTestCase;
/**
* Read-write version of {@link Lucene42NormsFormat}
* Read-write version of 4.2 norms format for testing
* @deprecated for test purposes only
*/
public class Lucene42RWNormsFormat extends Lucene42NormsFormat {
@Deprecated
public final class Lucene42RWNormsFormat extends Lucene42NormsFormat {
@Override
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {

View File

@ -24,8 +24,12 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** read-write version of lucene 4.2 term vectors for testing */
public class Lucene42RWTermVectorsFormat extends Lucene42TermVectorsFormat {
/**
* Read-Write version of 4.2 term vectors format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene42RWTermVectorsFormat extends Lucene42TermVectorsFormat {
@Override
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {

View File

@ -58,8 +58,10 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
* writer for Lucene 4.2 term vectors for testing
* Writer for 4.2 term vectors format for testing
* @deprecated for test purposes only
*/
@Deprecated
final class Lucene42TermVectorsWriter extends TermVectorsWriter {
// hard limit on the maximum number of documents per chunk

View File

@ -37,7 +37,7 @@ import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
* Read-write version of {@link Lucene45Codec} for testing.
*/
@SuppressWarnings("deprecation")
public class Lucene45RWCodec extends Lucene45Codec {
public final class Lucene45RWCodec extends Lucene45Codec {
private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() {
@Override

View File

@ -24,9 +24,11 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
/**
* Read-write version of {@link Lucene45DocValuesFormat} for testing.
* Read-write version of 4.5 docvalues format for testing
* @deprecated for test purposes only
*/
public class Lucene45RWDocValuesFormat extends Lucene45DocValuesFormat {
@Deprecated
public final class Lucene45RWDocValuesFormat extends Lucene45DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {

View File

@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene45;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
import org.junit.BeforeClass;
/**
* Tests Lucene45DocValuesFormat

View File

@ -20,7 +20,6 @@ package org.apache.lucene.codecs.lucene46;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
@ -29,10 +28,11 @@ import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
import org.apache.lucene.codecs.lucene45.Lucene45RWDocValuesFormat;
/**
* Read-write version of {@link Lucene46Codec} for testing.
* Read-write version of 4.6 codec for testing
* @deprecated for test purposes only
*/
@SuppressWarnings("deprecation")
public class Lucene46RWCodec extends Lucene46Codec {
@Deprecated
public final class Lucene46RWCodec extends Lucene46Codec {
private static final DocValuesFormat docValues = new Lucene45RWDocValuesFormat();

View File

@ -19,8 +19,12 @@ package org.apache.lucene.codecs.lucene46;
import org.apache.lucene.codecs.SegmentInfoWriter;
/** read-write version of 4.6 segmentinfos for testing */
public class Lucene46RWSegmentInfoFormat extends Lucene46SegmentInfoFormat {
/**
* Read-Write version of 4.6 segmentinfo format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene46RWSegmentInfoFormat extends Lucene46SegmentInfoFormat {
@Override
public SegmentInfoWriter getSegmentInfoWriter() {
return new Lucene46SegmentInfoWriter();

View File

@ -31,12 +31,11 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
* Lucene 4.0 implementation of {@link SegmentInfoWriter}.
*
* @see Lucene46SegmentInfoFormat
* @lucene.experimental
* Writer for 4.0 segmentinfo format for testing
* @deprecated for test purposes only
*/
public class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
@Deprecated
final class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
/** Sole constructor. */
public Lucene46SegmentInfoWriter() {

View File

@ -27,10 +27,11 @@ import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
import org.apache.lucene.codecs.lucene46.Lucene46RWSegmentInfoFormat;
/**
* Read-write version of {@link Lucene49Codec} for testing.
* Read-Write version of 4.9 codec for testing
* @deprecated for test purposes only
*/
@SuppressWarnings("deprecation")
public class Lucene49RWCodec extends Lucene49Codec {
@Deprecated
public final class Lucene49RWCodec extends Lucene49Codec {
private static final DocValuesFormat docValues = new Lucene49RWDocValuesFormat();

View File

@ -22,10 +22,13 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.LuceneTestCase;
/** Read-write version of {@link Lucene49DocValuesFormat} for testing */
public class Lucene49RWDocValuesFormat extends Lucene49DocValuesFormat {
/**
* Read-Write version of 4.9 docvalues format for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene49RWDocValuesFormat extends Lucene49DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {

View File

@ -107,22 +107,34 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
boolean success = false;
fieldInfos = fn;
numDocs = si.getDocCount();
ChecksumIndexInput indexStream = null;
int version = -1;
long maxPointer = -1;
CompressingStoredFieldsIndexReader indexReader = null;
// Load the index into memory
final String indexName = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION);
try (ChecksumIndexInput indexStream = d.openChecksumInput(indexName, context)) {
Throwable priorE = null;
try {
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkSegmentHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
maxPointer = indexStream.readVLong();
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(indexStream, priorE);
}
}
this.version = version;
this.maxPointer = maxPointer;
this.indexReader = indexReader;
final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION);
try {
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION);
final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION);
// Load the index into memory
indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkSegmentHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
maxPointer = indexStream.readVLong();
CodecUtil.checkFooter(indexStream);
indexStream.close();
indexStream = null;
// Open the data file and read metadata
fieldsStream = d.openInput(fieldsStreamFN, context);
if (maxPointer + CodecUtil.footerLength() != fieldsStream.length()) {
@ -149,7 +161,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(this, indexStream);
IOUtils.closeWhileHandlingException(this);
}
}
}

View File

@ -105,21 +105,30 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
boolean success = false;
fieldInfos = fn;
numDocs = si.getDocCount();
ChecksumIndexInput indexStream = null;
try {
// Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkSegmentHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
indexStream.readVLong(); // the end of the data file
CodecUtil.checkFooter(indexStream);
indexStream.close();
indexStream = null;
int version = -1;
CompressingStoredFieldsIndexReader indexReader = null;
// Load the index into memory
final String indexName = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
try (ChecksumIndexInput input = d.openChecksumInput(indexName, context)) {
Throwable priorE = null;
try {
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkSegmentHeader(input, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameIdx) == input.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(input, si);
input.readVLong(); // the end of the data file
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
}
this.version = version;
this.indexReader = indexReader;
try {
// Open the data file and read metadata
final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
vectorsStream = d.openInput(vectorsStreamFN, context);
@ -146,7 +155,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(this, indexStream);
IOUtils.closeWhileHandlingException(this);
}
}
}

View File

@ -18,9 +18,6 @@ package org.apache.lucene.codecs.compressing;
*/
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentReader;
@ -35,7 +32,7 @@ class MatchingReaders {
* vectors may be bulk merged. */
final boolean[] matchingReaders;
/** How many {@link #matchingSegmentReaders} are set. */
/** How many {@link #matchingReaders} are set. */
final int count;
MatchingReaders(MergeState mergeState) {

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@ -46,14 +45,19 @@ public class MergeState {
/** {@link FieldInfos} of the newly merged segment. */
public FieldInfos mergeFieldInfos;
/** Stored field producers being merged */
public final StoredFieldsReader[] storedFieldsReaders;
/** Term vector producers being merged */
public final TermVectorsReader[] termVectorsReaders;
/** Norms producers being merged */
public final NormsProducer[] normsProducers;
/** DocValues producers being merged */
public final DocValuesProducer[] docValuesProducers;
/** FieldInfos being merged */
public final FieldInfos[] fieldInfos;
/** Live docs for each reader */

View File

@ -232,8 +232,6 @@ public class SolrIndexConfig {
iwc.setMergedSegmentWarmer(warmer);
}
iwc.setCheckIntegrityAtMerge(checkIntegrityAtMerge);
return iwc;
}

View File

@ -78,18 +78,6 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
}
@Test
public void testCheckIntegrityAtMerge() throws Exception {
SolrConfig solrConfig = new SolrConfig("solr" + File.separator
+ "collection1", "solrconfig-indexconfig.xml", null);
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
assertNotNull(solrIndexConfig.checkIntegrityAtMerge);
assertTrue(solrIndexConfig.checkIntegrityAtMerge);
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema);
assertTrue(iwc.getCheckIntegrityAtMerge());
}
public void testMergedSegmentWarmerIndexConfigCreation() throws Exception {
SolrConfig solrConfig = new SolrConfig("solr" + File.separator
+ "collection1", "solrconfig-warmer.xml", null);