LUCENE-5969: remove some cruft, header -> segmentHeader

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1628024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-09-27 23:18:35 +00:00
parent 1de80597a1
commit d3eb93047b
9 changed files with 49 additions and 98 deletions

View File

@ -17,8 +17,6 @@ package org.apache.lucene.codecs.lucene41;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
@ -28,10 +26,7 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
@ -39,9 +34,6 @@ import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Implements the Lucene 4.1 index format, with configurable per-field postings formats.
@ -55,13 +47,7 @@ import org.apache.lucene.store.IOContext;
*/
@Deprecated
public class Lucene41Codec extends Codec {
// TODO: slightly evil
private final StoredFieldsFormat fieldsFormat = new CompressingStoredFieldsFormat("Lucene41StoredFields", CompressionMode.FAST, 1 << 14) {
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading");
}
};
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat();

View File

@ -64,7 +64,7 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
* <p>
* <code>formatName</code> is the name of the format. This name will be used
* in the file formats to perform
* {@link CodecUtil#checkHeader(org.apache.lucene.store.DataInput, String, int, int) codec header checks}.
* {@link CodecUtil#checkSegmentHeader(org.apache.lucene.store.DataInput, String, int, int, byte[]) codec header checks}.
* <p>
* <code>segmentSuffix</code> is the segment suffix. This suffix is added to
* the result file name only if it's not the empty string.

View File

@ -27,8 +27,6 @@ import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.STRING;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.FIELDS_EXTENSION;
@ -116,54 +114,37 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
// Load the index into memory
indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
version = CodecUtil.checkSegmentHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
long maxPointer = -1;
if (version >= VERSION_CHECKSUM) {
maxPointer = indexStream.readVLong();
CodecUtil.checkFooter(indexStream);
} else {
CodecUtil.checkEOF(indexStream);
}
maxPointer = indexStream.readVLong();
CodecUtil.checkFooter(indexStream);
indexStream.close();
indexStream = null;
// Open the data file and read metadata
fieldsStream = d.openInput(fieldsStreamFN, context);
if (version >= VERSION_CHECKSUM) {
if (maxPointer + CodecUtil.footerLength() != fieldsStream.length()) {
throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.length(), fieldsStream);
}
} else {
maxPointer = fieldsStream.length();
if (maxPointer + CodecUtil.footerLength() != fieldsStream.length()) {
throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.length(), fieldsStream);
}
this.maxPointer = maxPointer;
final String codecNameDat = formatName + CODEC_SFX_DAT;
final int fieldsVersion = CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
final int fieldsVersion = CodecUtil.checkSegmentHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT, si.getId());
if (version != fieldsVersion) {
throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion, fieldsStream);
}
assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
assert CodecUtil.segmentHeaderLength(codecNameDat) == fieldsStream.getFilePointer();
if (version >= VERSION_BIG_CHUNKS) {
chunkSize = fieldsStream.readVInt();
} else {
chunkSize = -1;
}
chunkSize = fieldsStream.readVInt();
packedIntsVersion = fieldsStream.readVInt();
decompressor = compressionMode.newDecompressor();
this.bytes = new BytesRef();
if (version >= VERSION_CHECKSUM) {
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(fieldsStream);
}
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(fieldsStream);
success = true;
} finally {
@ -310,7 +291,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
}
final DataInput documentInput;
if (version >= VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize) {
if (totalLength >= 2 * chunkSize) {
assert chunkSize > 0;
assert offset < chunkSize;
@ -497,7 +478,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
void decompress() throws IOException {
// decompress data
final int chunkSize = chunkSize();
if (version >= VERSION_BIG_CHUNKS && chunkSize >= 2 * CompressingStoredFieldsReader.this.chunkSize) {
if (chunkSize >= 2 * CompressingStoredFieldsReader.this.chunkSize) {
bytes.offset = bytes.length = 0;
for (int decompressed = 0; decompressed < chunkSize; ) {
final int toDecompress = Math.min(chunkSize - decompressed, CompressingStoredFieldsReader.this.chunkSize);
@ -519,10 +500,8 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
* Check integrity of the data. The iterator is not usable after this method has been called.
*/
void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
fieldsStream.seek(fieldsStream.length() - CodecUtil.footerLength());
CodecUtil.checkFooter(fieldsStream);
}
fieldsStream.seek(fieldsStream.length() - CodecUtil.footerLength());
CodecUtil.checkFooter(fieldsStream);
}
}
@ -539,9 +518,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
@Override
public void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(fieldsStream);
}
CodecUtil.checksumEntireFile(fieldsStream);
}
@Override

View File

@ -73,9 +73,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
static final String CODEC_SFX_IDX = "Index";
static final String CODEC_SFX_DAT = "Data";
static final int VERSION_START = 0;
static final int VERSION_BIG_CHUNKS = 1;
static final int VERSION_CHECKSUM = 2;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
static final int VERSION_CURRENT = VERSION_START;
private final Directory directory;
private final String segment;
@ -118,10 +116,10 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
CodecUtil.writeSegmentHeader(indexStream, codecNameIdx, VERSION_CURRENT, si.getId());
CodecUtil.writeSegmentHeader(fieldsStream, codecNameDat, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameDat) == fieldsStream.getFilePointer();
assert CodecUtil.segmentHeaderLength(codecNameIdx) == indexStream.getFilePointer();
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
indexStream = null;

View File

@ -46,7 +46,7 @@ public class CompressingTermVectorsFormat extends TermVectorsFormat {
* <p>
* <code>formatName</code> is the name of the format. This name will be used
* in the file formats to perform
* {@link CodecUtil#checkHeader(org.apache.lucene.store.DataInput, String, int, int) codec header checks}.
* {@link CodecUtil#checkSegmentHeader(org.apache.lucene.store.DataInput, String, int, int, byte[]) codec header checks}.
* <p>
* The <code>compressionMode</code> parameter allows you to choose between
* compression algorithms that have various compression and decompression

View File

@ -28,7 +28,6 @@ import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM;
import java.io.Closeable;
import java.io.IOException;
@ -112,16 +111,12 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
version = CodecUtil.checkSegmentHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
if (version >= VERSION_CHECKSUM) {
indexStream.readVLong(); // the end of the data file
CodecUtil.checkFooter(indexStream);
} else {
CodecUtil.checkEOF(indexStream);
}
indexStream.readVLong(); // the end of the data file
CodecUtil.checkFooter(indexStream);
indexStream.close();
indexStream = null;
@ -129,21 +124,19 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
vectorsStream = d.openInput(vectorsStreamFN, context);
final String codecNameDat = formatName + CODEC_SFX_DAT;
int version2 = CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
int version2 = CodecUtil.checkSegmentHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT, si.getId());
if (version != version2) {
throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + version2, vectorsStream);
}
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
assert CodecUtil.segmentHeaderLength(codecNameDat) == vectorsStream.getFilePointer();
long pos = vectorsStream.getFilePointer();
if (version >= VERSION_CHECKSUM) {
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(vectorsStream);
vectorsStream.seek(pos);
}
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(vectorsStream);
vectorsStream.seek(pos);
packedIntsVersion = vectorsStream.readVInt();
chunkSize = vectorsStream.readVInt();
@ -1078,9 +1071,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
@Override
public void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(vectorsStream);
}
CodecUtil.checksumEntireFile(vectorsStream);
}
@Override

View File

@ -68,8 +68,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
static final String CODEC_SFX_DAT = "Data";
static final int VERSION_START = 0;
static final int VERSION_CHECKSUM = 1;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
static final int VERSION_CURRENT = VERSION_START;
static final int BLOCK_SIZE = 64;
@ -231,10 +230,10 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
CodecUtil.writeHeader(vectorsStream, codecNameDat, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
CodecUtil.writeSegmentHeader(indexStream, codecNameIdx, VERSION_CURRENT, si.getId());
CodecUtil.writeSegmentHeader(vectorsStream, codecNameDat, VERSION_CURRENT, si.getId());
assert CodecUtil.segmentHeaderLength(codecNameDat) == vectorsStream.getFilePointer();
assert CodecUtil.segmentHeaderLength(codecNameIdx) == indexStream.getFilePointer();
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
indexStream = null;

View File

@ -52,7 +52,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>Here is a more detailed description of the field data file format:</p>
* <ul>
* <li>FieldData (.fdt) --&gt; &lt;Header&gt;, PackedIntsVersion, &lt;Chunk&gt;<sup>ChunkCount</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>Header --&gt; {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
* <li>PackedIntsVersion --&gt; {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li>
* <li>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</li>
* <li>Chunk --&gt; DocBase, ChunkDocs, DocFieldCounts, DocLengths, &lt;CompressedDocs&gt;</li>
@ -104,7 +104,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>A fields index file (extension <tt>.fdx</tt>).</p>
* <ul>
* <li>FieldsIndex (.fdx) --&gt; &lt;Header&gt;, &lt;ChunkIndex&gt;</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>Header --&gt; {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
* <li>ChunkIndex: See {@link CompressingStoredFieldsIndexWriter}</li>
* </ul>
* </li>

View File

@ -59,7 +59,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>Here is a more detailed description of the field data file format:</p>
* <ul>
* <li>VectorData (.tvd) --&gt; &lt;Header&gt;, PackedIntsVersion, ChunkSize, &lt;Chunk&gt;<sup>ChunkCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>Header --&gt; {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
* <li>PackedIntsVersion --&gt; {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li>
* <li>ChunkSize is the number of bytes of terms to accumulate before flushing, as a {@link DataOutput#writeVInt VInt}</li>
* <li>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</li>
@ -113,7 +113,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>An index file (extension <tt>.tvx</tt>).</p>
* <ul>
* <li>VectorIndex (.tvx) --&gt; &lt;Header&gt;, &lt;ChunkIndex&gt;, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>Header --&gt; {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
* <li>ChunkIndex: See {@link CompressingStoredFieldsIndexWriter}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>