mirror of https://github.com/apache/lucene.git
LUCENE-2446: add checksums to index files
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1583550 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aacd7ee80f
commit
c189d0fb74
|
@ -138,6 +138,13 @@ New Features
|
|||
* LUCENE-5558: Add TruncateTokenFilter which truncates terms to
|
||||
the specified length. (Ahmet Arslan via Robert Muir)
|
||||
|
||||
* LUCENE-2446: Added checksums to lucene index files. As of 4.8, the last 8
|
||||
bytes of each file contain a zlib-crc32 checksum. Small metadata files are
|
||||
verified on load. Larger files can be checked on demand via
|
||||
AtomicReader.checkIntegrity. You can configure this to happen automatically
|
||||
before merges by enabling IndexWriterConfig.setCheckIntegrityAtMerge.
|
||||
(Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
|
||||
|
|
|
@ -177,7 +177,10 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
private void seekDir(IndexInput input, long dirOffset) throws IOException {
|
||||
if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
|
||||
if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
|
||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||
dirOffset = input.readLong();
|
||||
} else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
|
||||
input.seek(input.length() - 8);
|
||||
dirOffset = input.readLong();
|
||||
}
|
||||
|
@ -863,4 +866,14 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0;
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// verify terms
|
||||
if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(in);
|
||||
}
|
||||
// verify postings
|
||||
postingsReader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,12 +63,13 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
public static final int VERSION_START = 0;
|
||||
public static final int VERSION_APPEND_ONLY = 1;
|
||||
public static final int VERSION_META_ARRAY = 2;
|
||||
public static final int VERSION_CURRENT = VERSION_META_ARRAY;
|
||||
public static final int VERSION_CHECKSUM = 3;
|
||||
public static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
/** Extension of terms file */
|
||||
static final String TERMS_EXTENSION = "tib";
|
||||
|
||||
protected final IndexOutput out;
|
||||
protected IndexOutput out;
|
||||
final PostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
FieldInfo currentField;
|
||||
|
@ -176,26 +177,30 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
|
||||
out.writeVInt(fields.size());
|
||||
for(FieldMetaData field : fields) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.numTerms);
|
||||
out.writeVLong(field.termsStartPointer);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||
out.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
if (VERSION_CURRENT >= VERSION_META_ARRAY) {
|
||||
out.writeVInt(field.longsSize);
|
||||
if (out != null) {
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
|
||||
out.writeVInt(fields.size());
|
||||
for(FieldMetaData field : fields) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.numTerms);
|
||||
out.writeVLong(field.termsStartPointer);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||
out.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
if (VERSION_CURRENT >= VERSION_META_ARRAY) {
|
||||
out.writeVInt(field.longsSize);
|
||||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
CodecUtil.writeFooter(out);
|
||||
} finally {
|
||||
IOUtils.close(out, postingsWriter, termsIndexWriter);
|
||||
out = null;
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
} finally {
|
||||
IOUtils.close(out, postingsWriter, termsIndexWriter);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -66,6 +66,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
// start of the field info data
|
||||
private long dirOffset;
|
||||
|
||||
private int version;
|
||||
|
||||
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
|
||||
throws IOException {
|
||||
|
||||
|
@ -78,6 +80,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
try {
|
||||
|
||||
readHeader(in);
|
||||
|
||||
if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(in);
|
||||
}
|
||||
|
||||
indexInterval = in.readVInt();
|
||||
if (indexInterval < 1) {
|
||||
throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
|
||||
|
@ -124,7 +131,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
}
|
||||
|
||||
private void readHeader(IndexInput input) throws IOException {
|
||||
CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
|
||||
version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
|
||||
FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT);
|
||||
}
|
||||
|
||||
|
@ -273,7 +280,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
public void close() throws IOException {}
|
||||
|
||||
private void seekDir(IndexInput input, long dirOffset) throws IOException {
|
||||
input.seek(input.length() - 8);
|
||||
if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
|
||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||
} else {
|
||||
input.seek(input.length() - 8);
|
||||
}
|
||||
dirOffset = input.readLong();
|
||||
input.seek(dirOffset);
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
|
@ -43,7 +42,7 @@ import java.io.IOException;
|
|||
*
|
||||
* @lucene.experimental */
|
||||
public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||
protected final IndexOutput out;
|
||||
protected IndexOutput out;
|
||||
|
||||
/** Extension of terms index file */
|
||||
static final String TERMS_INDEX_EXTENSION = "tii";
|
||||
|
@ -52,7 +51,8 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
final static int VERSION_START = 0;
|
||||
final static int VERSION_APPEND_ONLY = 1;
|
||||
final static int VERSION_MONOTONIC_ADDRESSING = 2;
|
||||
final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING;
|
||||
final static int VERSION_CHECKSUM = 3;
|
||||
final static int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
final static int BLOCKSIZE = 4096;
|
||||
final private int termIndexInterval;
|
||||
|
@ -207,38 +207,42 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
nonNullFieldCount++;
|
||||
if (out != null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
nonNullFieldCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVInt(field.numIndexTerms);
|
||||
out.writeVLong(field.termsStart);
|
||||
out.writeVLong(field.indexStart);
|
||||
out.writeVLong(field.packedIndexStart);
|
||||
out.writeVLong(field.packedOffsetsStart);
|
||||
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVInt(field.numIndexTerms);
|
||||
out.writeVLong(field.termsStart);
|
||||
out.writeVLong(field.indexStart);
|
||||
out.writeVLong(field.packedIndexStart);
|
||||
out.writeVLong(field.packedOffsetsStart);
|
||||
}
|
||||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(out);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(out);
|
||||
writeTrailer(dirStart);
|
||||
CodecUtil.writeFooter(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(out);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(out);
|
||||
}
|
||||
out = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,6 +62,10 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
try {
|
||||
|
||||
version = readHeader(in);
|
||||
|
||||
if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(in);
|
||||
}
|
||||
|
||||
seekDir(in, dirOffset);
|
||||
|
||||
|
@ -190,7 +194,10 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
public void close() throws IOException {}
|
||||
|
||||
private void seekDir(IndexInput input, long dirOffset) throws IOException {
|
||||
if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
|
||||
if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
|
||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||
dirOffset = input.readLong();
|
||||
} else if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
|
||||
input.seek(input.length() - 8);
|
||||
dirOffset = input.readLong();
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ import org.apache.lucene.util.fst.Util;
|
|||
*
|
||||
* @lucene.experimental */
|
||||
public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||
protected final IndexOutput out;
|
||||
protected IndexOutput out;
|
||||
|
||||
/** Extension of terms index file */
|
||||
static final String TERMS_INDEX_EXTENSION = "tiv";
|
||||
|
@ -53,7 +53,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
final static String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX";
|
||||
final static int VERSION_START = 0;
|
||||
final static int VERSION_APPEND_ONLY = 1;
|
||||
final static int VERSION_CURRENT = VERSION_APPEND_ONLY;
|
||||
final static int VERSION_CHECKSUM = 2;
|
||||
final static int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
private final List<FSTFieldWriter> fields = new ArrayList<>();
|
||||
|
||||
|
@ -290,30 +291,34 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
FSTFieldWriter field = fields.get(i);
|
||||
if (field.fst != null) {
|
||||
nonNullFieldCount++;
|
||||
if (out != null) {
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
FSTFieldWriter field = fields.get(i);
|
||||
if (field.fst != null) {
|
||||
nonNullFieldCount++;
|
||||
}
|
||||
}
|
||||
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
FSTFieldWriter field = fields.get(i);
|
||||
if (field.fst != null) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.indexStart);
|
||||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
CodecUtil.writeFooter(out);
|
||||
} finally {
|
||||
out.close();
|
||||
out = null;
|
||||
}
|
||||
}
|
||||
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
FSTFieldWriter field = fields.get(i);
|
||||
if (field.fst != null) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.indexStart);
|
||||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void writeTrailer(long dirStart) throws IOException {
|
||||
|
|
|
@ -39,8 +39,8 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -66,7 +66,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
|||
* </p>
|
||||
* <ul>
|
||||
* <li>BloomFilter (.blm) --> Header, DelegatePostingsFormatName,
|
||||
* NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
|
||||
* NumFilteredFields, Filter<sup>NumFilteredFields</sup>, Footer</li>
|
||||
* <li>Filter --> FieldNumber, FuzzySet</li>
|
||||
* <li>FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
|
@ -75,13 +75,16 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
|||
* <li>NumFilteredFields --> {@link DataOutput#writeInt Uint32}</li>
|
||||
* <li>FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the
|
||||
* field in this segment</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||
|
||||
public static final String BLOOM_CODEC_NAME = "BloomFilter";
|
||||
public static final int BLOOM_CODEC_VERSION = 1;
|
||||
public static final int VERSION_START = 1;
|
||||
public static final int VERSION_CHECKSUM = 2;
|
||||
public static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
/** Extension of Bloom Filters file */
|
||||
static final String BLOOM_EXTENSION = "blm";
|
||||
|
@ -157,12 +160,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
|
||||
String bloomFileName = IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||
IndexInput bloomIn = null;
|
||||
ChecksumIndexInput bloomIn = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
bloomIn = state.directory.openInput(bloomFileName, state.context);
|
||||
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
|
||||
BLOOM_CODEC_VERSION);
|
||||
bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
|
||||
int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT);
|
||||
// // Load the hash function used in the BloomFilter
|
||||
// hashFunction = HashFunction.forName(bloomIn.readString());
|
||||
// Load the delegate postings format
|
||||
|
@ -178,6 +180,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
|
||||
bloomsByFieldName.put(fieldInfo.name, bloom);
|
||||
}
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(bloomIn);
|
||||
} else {
|
||||
CodecUtil.checkEOF(bloomIn);
|
||||
}
|
||||
IOUtils.close(bloomIn);
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -390,6 +397,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
delegateFieldsProducer.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
||||
class BloomFilteredFieldsConsumer extends FieldsConsumer {
|
||||
|
@ -466,10 +478,8 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||
IndexOutput bloomOutput = null;
|
||||
try {
|
||||
bloomOutput = state.directory
|
||||
.createOutput(bloomFileName, state.context);
|
||||
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
|
||||
BLOOM_CODEC_VERSION);
|
||||
bloomOutput = state.directory.createOutput(bloomFileName, state.context);
|
||||
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, VERSION_CURRENT);
|
||||
// remember the name of the postings format we will delegate to
|
||||
bloomOutput.writeString(delegatePostingsFormat.getName());
|
||||
|
||||
|
@ -481,6 +491,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
bloomOutput.writeInt(fieldInfo.number);
|
||||
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
|
||||
}
|
||||
CodecUtil.writeFooter(bloomOutput);
|
||||
} finally {
|
||||
IOUtils.close(bloomOutput);
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER;
|
|||
*/
|
||||
|
||||
class DirectDocValuesConsumer extends DocValuesConsumer {
|
||||
final IndexOutput data, meta;
|
||||
IndexOutput data, meta;
|
||||
final int maxDoc;
|
||||
|
||||
DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
|
@ -142,6 +142,10 @@ class DirectDocValuesConsumer extends DocValuesConsumer {
|
|||
try {
|
||||
if (meta != null) {
|
||||
meta.writeVInt(-1); // write EOF marker
|
||||
CodecUtil.writeFooter(meta); // write checksum
|
||||
}
|
||||
if (data != null) {
|
||||
CodecUtil.writeFooter(data);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -150,6 +154,7 @@ class DirectDocValuesConsumer extends DocValuesConsumer {
|
|||
} else {
|
||||
IOUtils.closeWhileHandlingException(data, meta);
|
||||
}
|
||||
data = meta = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.index.RandomAccessOrds;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -65,6 +66,7 @@ class DirectDocValuesProducer extends DocValuesProducer {
|
|||
|
||||
private final int maxDoc;
|
||||
private final AtomicLong ramBytesUsed;
|
||||
private final int version;
|
||||
|
||||
static final byte NUMBER = 0;
|
||||
static final byte BYTES = 1;
|
||||
|
@ -72,22 +74,27 @@ class DirectDocValuesProducer extends DocValuesProducer {
|
|||
static final byte SORTED_SET = 3;
|
||||
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
static final int VERSION_CHECKSUM = 1;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
maxDoc = state.segmentInfo.getDocCount();
|
||||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||
// read in the entries from the metadata file.
|
||||
IndexInput in = state.directory.openInput(metaName, state.context);
|
||||
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
|
||||
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
|
||||
boolean success = false;
|
||||
final int version;
|
||||
try {
|
||||
version = CodecUtil.checkHeader(in, metaCodec,
|
||||
VERSION_START,
|
||||
VERSION_CURRENT);
|
||||
readFields(in);
|
||||
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(in);
|
||||
} else {
|
||||
CodecUtil.checkEOF(in);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
@ -185,6 +192,13 @@ class DirectDocValuesProducer extends DocValuesProducer {
|
|||
return ramBytesUsed.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(data);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||
NumericDocValues instance = numericInstances.get(field.number);
|
||||
|
|
|
@ -109,6 +109,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
if (state.context.context != IOContext.Context.MERGE) {
|
||||
FieldsProducer loadedPostings;
|
||||
try {
|
||||
postings.checkIntegrity();
|
||||
loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
|
||||
} finally {
|
||||
postings.close();
|
||||
|
@ -157,6 +158,12 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// if we read entirely into ram, we already validated.
|
||||
// otherwise returned the raw postings reader
|
||||
}
|
||||
}
|
||||
|
||||
private final static class DirectField extends Terms {
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.index.TermState;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
|
@ -56,14 +57,13 @@ import org.apache.lucene.codecs.BlockTermState;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.memory.FSTTermsReader.TermsReader;
|
||||
|
||||
/**
|
||||
* FST-based terms dictionary reader.
|
||||
*
|
||||
* The FST index maps each term and its ord, and during seek
|
||||
* the ord is used fetch metadata from a single block.
|
||||
* The term dictionary is fully memeory resident.
|
||||
* The term dictionary is fully memory resident.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -71,8 +71,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
|
||||
final TreeMap<String, TermsReader> fields = new TreeMap<>();
|
||||
final PostingsReaderBase postingsReader;
|
||||
IndexInput indexIn = null;
|
||||
IndexInput blockIn = null;
|
||||
int version;
|
||||
//static final boolean TEST = false;
|
||||
|
||||
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
|
||||
|
@ -80,11 +79,18 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
|
||||
|
||||
this.postingsReader = postingsReader;
|
||||
ChecksumIndexInput indexIn = null;
|
||||
IndexInput blockIn = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
this.indexIn = state.directory.openInput(termsIndexFileName, state.context);
|
||||
this.blockIn = state.directory.openInput(termsBlockFileName, state.context);
|
||||
readHeader(indexIn);
|
||||
indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context);
|
||||
blockIn = state.directory.openInput(termsBlockFileName, state.context);
|
||||
version = readHeader(indexIn);
|
||||
readHeader(blockIn);
|
||||
if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(blockIn);
|
||||
}
|
||||
|
||||
this.postingsReader.init(blockIn);
|
||||
seekDir(blockIn);
|
||||
|
||||
|
@ -100,12 +106,22 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
int longsSize = blockIn.readVInt();
|
||||
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
|
||||
|
||||
TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
|
||||
TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
|
||||
TermsReader previous = fields.put(fieldInfo.name, current);
|
||||
checkFieldSummary(state.segmentInfo, current, previous);
|
||||
checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous);
|
||||
}
|
||||
if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(indexIn);
|
||||
} else {
|
||||
CodecUtil.checkEOF(indexIn);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(indexIn, blockIn);
|
||||
if (success) {
|
||||
IOUtils.close(indexIn, blockIn);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(indexIn, blockIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,10 +131,14 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
FSTOrdTermsWriter.TERMS_VERSION_CURRENT);
|
||||
}
|
||||
private void seekDir(IndexInput in) throws IOException {
|
||||
in.seek(in.length() - 8);
|
||||
if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
|
||||
in.seek(in.length() - CodecUtil.footerLength() - 8);
|
||||
} else {
|
||||
in.seek(in.length() - 8);
|
||||
}
|
||||
in.seek(in.readLong());
|
||||
}
|
||||
private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
|
||||
private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException {
|
||||
// #docs with field must be <= #docs
|
||||
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
|
||||
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")");
|
||||
|
@ -176,7 +196,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
final byte[] metaLongsBlock;
|
||||
final byte[] metaBytesBlock;
|
||||
|
||||
TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
|
||||
TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
|
@ -819,4 +839,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
postingsReader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,9 +73,10 @@ import org.apache.lucene.util.fst.Util;
|
|||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup></li>
|
||||
* <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup>, Footer</li>
|
||||
* <li>TermFST --> {@link FST FST<long>}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>Notes:</p>
|
||||
|
@ -103,7 +104,7 @@ import org.apache.lucene.util.fst.Util;
|
|||
* <ul>
|
||||
* <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
|
||||
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
|
||||
* DocCount, LongsSize, DataBlock > <sup>NumFields</sup></li>
|
||||
* DocCount, LongsSize, DataBlock > <sup>NumFields</sup>, Footer</li>
|
||||
*
|
||||
* <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
|
||||
|
@ -119,6 +120,7 @@ import org.apache.lucene.util.fst.Util;
|
|||
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
|
||||
* LongDelta,--> {@link DataOutput#writeVLong VLong}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes: </p>
|
||||
* <ul>
|
||||
|
@ -148,7 +150,8 @@ public class FSTOrdTermsWriter extends FieldsConsumer {
|
|||
static final String TERMS_BLOCK_EXTENSION = "tbk";
|
||||
static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
|
||||
public static final int TERMS_VERSION_START = 0;
|
||||
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
|
||||
public static final int TERMS_VERSION_CHECKSUM = 1;
|
||||
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
|
||||
public static final int SKIP_INTERVAL = 8;
|
||||
|
||||
final PostingsWriterBase postingsWriter;
|
||||
|
@ -218,36 +221,41 @@ public class FSTOrdTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
IOException ioe = null;
|
||||
try {
|
||||
final long blockDirStart = blockOut.getFilePointer();
|
||||
|
||||
// write field summary
|
||||
blockOut.writeVInt(fields.size());
|
||||
for (FieldMetaData field : fields) {
|
||||
blockOut.writeVInt(field.fieldInfo.number);
|
||||
blockOut.writeVLong(field.numTerms);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||
blockOut.writeVLong(field.sumTotalTermFreq);
|
||||
if (blockOut != null) {
|
||||
IOException ioe = null;
|
||||
try {
|
||||
final long blockDirStart = blockOut.getFilePointer();
|
||||
|
||||
// write field summary
|
||||
blockOut.writeVInt(fields.size());
|
||||
for (FieldMetaData field : fields) {
|
||||
blockOut.writeVInt(field.fieldInfo.number);
|
||||
blockOut.writeVLong(field.numTerms);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||
blockOut.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
blockOut.writeVLong(field.sumDocFreq);
|
||||
blockOut.writeVInt(field.docCount);
|
||||
blockOut.writeVInt(field.longsSize);
|
||||
blockOut.writeVLong(field.statsOut.getFilePointer());
|
||||
blockOut.writeVLong(field.metaLongsOut.getFilePointer());
|
||||
blockOut.writeVLong(field.metaBytesOut.getFilePointer());
|
||||
|
||||
field.skipOut.writeTo(blockOut);
|
||||
field.statsOut.writeTo(blockOut);
|
||||
field.metaLongsOut.writeTo(blockOut);
|
||||
field.metaBytesOut.writeTo(blockOut);
|
||||
field.dict.save(indexOut);
|
||||
}
|
||||
blockOut.writeVLong(field.sumDocFreq);
|
||||
blockOut.writeVInt(field.docCount);
|
||||
blockOut.writeVInt(field.longsSize);
|
||||
blockOut.writeVLong(field.statsOut.getFilePointer());
|
||||
blockOut.writeVLong(field.metaLongsOut.getFilePointer());
|
||||
blockOut.writeVLong(field.metaBytesOut.getFilePointer());
|
||||
|
||||
field.skipOut.writeTo(blockOut);
|
||||
field.statsOut.writeTo(blockOut);
|
||||
field.metaLongsOut.writeTo(blockOut);
|
||||
field.metaBytesOut.writeTo(blockOut);
|
||||
field.dict.save(indexOut);
|
||||
writeTrailer(blockOut, blockDirStart);
|
||||
CodecUtil.writeFooter(indexOut);
|
||||
CodecUtil.writeFooter(blockOut);
|
||||
} catch (IOException ioe2) {
|
||||
ioe = ioe2;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
|
||||
blockOut = null;
|
||||
}
|
||||
writeTrailer(blockOut, blockDirStart);
|
||||
} catch (IOException ioe2) {
|
||||
ioe = ioe2;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
* FST-based terms dictionary reader.
|
||||
*
|
||||
* The FST directly maps each term and its metadata,
|
||||
* it is memeory resident.
|
||||
* it is memory resident.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -67,18 +67,21 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
public class FSTTermsReader extends FieldsProducer {
|
||||
final TreeMap<String, TermsReader> fields = new TreeMap<>();
|
||||
final PostingsReaderBase postingsReader;
|
||||
final IndexInput in;
|
||||
//static boolean TEST = false;
|
||||
final int version;
|
||||
|
||||
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
|
||||
|
||||
this.postingsReader = postingsReader;
|
||||
this.in = state.directory.openInput(termsFileName, state.context);
|
||||
final IndexInput in = state.directory.openInput(termsFileName, state.context);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
readHeader(in);
|
||||
version = readHeader(in);
|
||||
if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(in);
|
||||
}
|
||||
this.postingsReader.init(in);
|
||||
seekDir(in);
|
||||
|
||||
|
@ -92,13 +95,15 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
long sumDocFreq = in.readVLong();
|
||||
int docCount = in.readVInt();
|
||||
int longsSize = in.readVInt();
|
||||
TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
|
||||
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
|
||||
TermsReader previous = fields.put(fieldInfo.name, current);
|
||||
checkFieldSummary(state.segmentInfo, current, previous);
|
||||
checkFieldSummary(state.segmentInfo, in, current, previous);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
if (success) {
|
||||
IOUtils.close(in);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(in);
|
||||
}
|
||||
}
|
||||
|
@ -110,10 +115,14 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
FSTTermsWriter.TERMS_VERSION_CURRENT);
|
||||
}
|
||||
private void seekDir(IndexInput in) throws IOException {
|
||||
in.seek(in.length() - 8);
|
||||
if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
|
||||
in.seek(in.length() - CodecUtil.footerLength() - 8);
|
||||
} else {
|
||||
in.seek(in.length() - 8);
|
||||
}
|
||||
in.seek(in.readLong());
|
||||
}
|
||||
private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
|
||||
private void checkFieldSummary(SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException {
|
||||
// #docs with field must be <= #docs
|
||||
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
|
||||
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
|
||||
|
@ -150,7 +159,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
IOUtils.close(in, postingsReader);
|
||||
IOUtils.close(postingsReader);
|
||||
} finally {
|
||||
fields.clear();
|
||||
}
|
||||
|
@ -165,7 +174,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
final int longsSize;
|
||||
final FST<FSTTermOutputs.TermData> dict;
|
||||
|
||||
TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
|
||||
TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
|
@ -729,4 +738,9 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
postingsReader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -124,11 +124,12 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
static final String TERMS_EXTENSION = "tmp";
|
||||
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
|
||||
public static final int TERMS_VERSION_START = 0;
|
||||
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
|
||||
public static final int TERMS_VERSION_CHECKSUM = 1;
|
||||
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
|
||||
|
||||
final PostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
final IndexOutput out;
|
||||
IndexOutput out;
|
||||
final int maxDoc;
|
||||
final List<FieldMetaData> fields = new ArrayList<>();
|
||||
|
||||
|
@ -199,28 +200,32 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
IOException ioe = null;
|
||||
try {
|
||||
// write field summary
|
||||
final long dirStart = out.getFilePointer();
|
||||
|
||||
out.writeVInt(fields.size());
|
||||
for (FieldMetaData field : fields) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.numTerms);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||
out.writeVLong(field.sumTotalTermFreq);
|
||||
if (out != null) {
|
||||
IOException ioe = null;
|
||||
try {
|
||||
// write field summary
|
||||
final long dirStart = out.getFilePointer();
|
||||
|
||||
out.writeVInt(fields.size());
|
||||
for (FieldMetaData field : fields) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.numTerms);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||
out.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
out.writeVInt(field.longsSize);
|
||||
field.dict.save(out);
|
||||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
out.writeVInt(field.longsSize);
|
||||
field.dict.save(out);
|
||||
writeTrailer(out, dirStart);
|
||||
CodecUtil.writeFooter(out);
|
||||
} catch (IOException ioe2) {
|
||||
ioe = ioe2;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
|
||||
out = null;
|
||||
}
|
||||
writeTrailer(out, dirStart);
|
||||
} catch (IOException ioe2) {
|
||||
ioe = ioe2;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.UNCOMPRESS
|
|||
* Writer for {@link MemoryDocValuesFormat}
|
||||
*/
|
||||
class MemoryDocValuesConsumer extends DocValuesConsumer {
|
||||
final IndexOutput data, meta;
|
||||
IndexOutput data, meta;
|
||||
final int maxDoc;
|
||||
final float acceptableOverheadRatio;
|
||||
|
||||
|
@ -208,6 +208,10 @@ class MemoryDocValuesConsumer extends DocValuesConsumer {
|
|||
try {
|
||||
if (meta != null) {
|
||||
meta.writeVInt(-1); // write EOF marker
|
||||
CodecUtil.writeFooter(meta); // write checksum
|
||||
}
|
||||
if (data != null) {
|
||||
CodecUtil.writeFooter(data);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -216,6 +220,7 @@ class MemoryDocValuesConsumer extends DocValuesConsumer {
|
|||
} else {
|
||||
IOUtils.closeWhileHandlingException(data, meta);
|
||||
}
|
||||
data = meta = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -77,6 +78,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
|||
|
||||
private final int maxDoc;
|
||||
private final AtomicLong ramBytesUsed;
|
||||
private final int version;
|
||||
|
||||
static final byte NUMBER = 0;
|
||||
static final byte BYTES = 1;
|
||||
|
@ -91,15 +93,15 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
|||
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_GCD_COMPRESSION = 1;
|
||||
static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
|
||||
static final int VERSION_CHECKSUM = 2;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
maxDoc = state.segmentInfo.getDocCount();
|
||||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||
// read in the entries from the metadata file.
|
||||
IndexInput in = state.directory.openInput(metaName, state.context);
|
||||
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
|
||||
boolean success = false;
|
||||
final int version;
|
||||
try {
|
||||
version = CodecUtil.checkHeader(in, metaCodec,
|
||||
VERSION_START,
|
||||
|
@ -108,6 +110,11 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
|||
binaries = new HashMap<>();
|
||||
fsts = new HashMap<>();
|
||||
readFields(in, state.fieldInfos);
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(in);
|
||||
} else {
|
||||
CodecUtil.checkEOF(in);
|
||||
}
|
||||
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -208,6 +215,13 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
|||
return ramBytesUsed.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(data);
|
||||
}
|
||||
}
|
||||
|
||||
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
|
||||
NumericEntry entry = numerics.get(field.number);
|
||||
data.seek(entry.offset + entry.missingBytes);
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Map;
|
|||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
|
@ -41,6 +42,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -271,6 +273,9 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
private static String EXTENSION = "ram";
|
||||
private static final String CODEC_NAME = "MemoryPostings";
|
||||
private static final int VERSION_START = 0;
|
||||
private static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
private class MemoryFieldsConsumer extends FieldsConsumer implements Closeable {
|
||||
private final SegmentWriteState state;
|
||||
|
@ -279,6 +284,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
private MemoryFieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
|
||||
out = state.directory.createOutput(fileName, state.context);
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
|
@ -403,6 +409,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
// EOF marker:
|
||||
try {
|
||||
out.writeVInt(0);
|
||||
CodecUtil.writeFooter(out);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
|
@ -951,7 +958,8 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
|
||||
final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE);
|
||||
final ChecksumIndexInput in = state.directory.openChecksumInput(fileName, IOContext.READONCE);
|
||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
|
||||
|
||||
final SortedMap<String,TermsReader> fields = new TreeMap<>();
|
||||
|
||||
|
@ -965,6 +973,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
// System.out.println("load field=" + termsReader.field.name);
|
||||
fields.put(termsReader.field.name, termsReader);
|
||||
}
|
||||
CodecUtil.checkFooter(in);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
|
@ -1002,6 +1011,9 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -653,4 +653,9 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
|||
public long ramBytesUsed() {
|
||||
return ((wrappedPostingsReader!=null) ? wrappedPostingsReader.ramBytesUsed(): 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
wrappedPostingsReader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -706,4 +706,9 @@ public class SepPostingsReader extends PostingsReaderBase {
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// TODO: remove sep layout, its fallen behind on features...
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
|
||||
|
@ -48,6 +49,8 @@ import org.apache.lucene.index.NumericDocValues;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -468,4 +471,19 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
BytesRef scratch = new BytesRef();
|
||||
IndexInput clone = data.clone();
|
||||
clone.seek(0);
|
||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
|
||||
while(true) {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (scratch.equals(END)) {
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef FIELD = new BytesRef("field ");
|
||||
final static BytesRef TYPE = new BytesRef(" type ");
|
||||
|
@ -49,7 +50,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
final static BytesRef NUMVALUES = new BytesRef(" numvalues ");
|
||||
final static BytesRef ORDPATTERN = new BytesRef(" ordpattern ");
|
||||
|
||||
final IndexOutput data;
|
||||
IndexOutput data;
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final int numDocs;
|
||||
private final Set<String> fieldsSeen = new HashSet<>(); // for asserting
|
||||
|
@ -389,18 +390,25 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
assert !fieldsSeen.isEmpty();
|
||||
// TODO: sheisty to do this here?
|
||||
SimpleTextUtil.write(data, END);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(data);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(data);
|
||||
if (data != null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
assert !fieldsSeen.isEmpty();
|
||||
// TODO: sheisty to do this here?
|
||||
SimpleTextUtil.write(data, END);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
String checksum = Long.toString(data.getChecksum());
|
||||
SimpleTextUtil.write(data, CHECKSUM);
|
||||
SimpleTextUtil.write(data, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(data);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(data);
|
||||
}
|
||||
data = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,15 +24,14 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -50,7 +49,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
|
|||
@Override
|
||||
public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
|
||||
IndexInput input = directory.openInput(fileName, iocontext);
|
||||
ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
|
||||
BytesRef scratch = new BytesRef();
|
||||
|
||||
boolean success = false;
|
||||
|
@ -130,9 +129,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
|
|||
infos[i].setDocValuesGen(dvGen);
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
|
||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||
success = true;
|
||||
|
|
|
@ -58,6 +58,7 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|||
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
|
||||
final static BytesRef ATT_KEY = new BytesRef(" key ");
|
||||
final static BytesRef ATT_VALUE = new BytesRef(" value ");
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
|
||||
@Override
|
||||
public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||
|
@ -132,6 +133,10 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|||
}
|
||||
}
|
||||
}
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
SimpleTextUtil.write(out, CHECKSUM);
|
||||
SimpleTextUtil.write(out, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.apache.lucene.index.FieldInfos;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -51,22 +53,23 @@ import org.apache.lucene.util.fst.PairOutputs;
|
|||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
|
||||
|
||||
class SimpleTextFieldsReader extends FieldsProducer {
|
||||
private final TreeMap<String,Long> fields;
|
||||
private final IndexInput in;
|
||||
private final FieldInfos fieldInfos;
|
||||
private final int maxDoc;
|
||||
|
||||
final static BytesRef END = SimpleTextFieldsWriter.END;
|
||||
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
|
||||
final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
|
||||
final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
|
||||
final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
|
||||
final static BytesRef POS = SimpleTextFieldsWriter.POS;
|
||||
final static BytesRef START_OFFSET = SimpleTextFieldsWriter.START_OFFSET;
|
||||
final static BytesRef END_OFFSET = SimpleTextFieldsWriter.END_OFFSET;
|
||||
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
|
||||
|
||||
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
|
||||
this.maxDoc = state.segmentInfo.getDocCount();
|
||||
fieldInfos = state.fieldInfos;
|
||||
|
@ -83,16 +86,18 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
|
||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
|
||||
BytesRef scratch = new BytesRef(10);
|
||||
TreeMap<String,Long> fields = new TreeMap<>();
|
||||
|
||||
while (true) {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (scratch.equals(END)) {
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
return fields;
|
||||
} else if (StringHelper.startsWith(scratch, FIELD)) {
|
||||
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, StandardCharsets.UTF_8);
|
||||
fields.put(fieldName, in.getFilePointer());
|
||||
fields.put(fieldName, input.getFilePointer());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -669,4 +674,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
}
|
||||
|
|
|
@ -35,10 +35,11 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
|
||||
|
||||
private final IndexOutput out;
|
||||
private IndexOutput out;
|
||||
private final BytesRef scratch = new BytesRef(10);
|
||||
private final SegmentWriteState writeState;
|
||||
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef FIELD = new BytesRef("field ");
|
||||
final static BytesRef TERM = new BytesRef(" term ");
|
||||
|
@ -215,11 +216,18 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
write(END);
|
||||
newline();
|
||||
} finally {
|
||||
out.close();
|
||||
if (out != null) {
|
||||
try {
|
||||
write(END);
|
||||
newline();
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
write(CHECKSUM);
|
||||
write(checksum);
|
||||
newline();
|
||||
} finally {
|
||||
out.close();
|
||||
out = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,9 +24,9 @@ import java.util.Collection;
|
|||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -50,6 +50,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
final static BytesRef SIZE = new BytesRef("size ");
|
||||
final static BytesRef DOC = new BytesRef(" doc ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
|
||||
@Override
|
||||
public MutableBits newLiveDocs(int size) throws IOException {
|
||||
|
@ -69,10 +70,10 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
CharsRef scratchUTF16 = new CharsRef();
|
||||
|
||||
String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen());
|
||||
IndexInput in = null;
|
||||
ChecksumIndexInput in = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
in = dir.openInput(fileName, context);
|
||||
in = dir.openChecksumInput(fileName, context);
|
||||
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch, SIZE);
|
||||
|
@ -88,6 +89,8 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
SimpleTextUtil.readLine(in, scratch);
|
||||
}
|
||||
|
||||
SimpleTextUtil.checkFooter(in, CHECKSUM);
|
||||
|
||||
success = true;
|
||||
return new SimpleTextBits(bits, size);
|
||||
} finally {
|
||||
|
@ -127,6 +130,10 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
|
||||
SimpleTextUtil.write(out, END);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
SimpleTextUtil.write(out, CHECKSUM);
|
||||
SimpleTextUtil.write(out, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
|
||||
|
@ -36,9 +37,9 @@ import java.util.Set;
|
|||
import org.apache.lucene.codecs.SegmentInfoReader;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -55,7 +56,7 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
|
|||
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
|
||||
BytesRef scratch = new BytesRef();
|
||||
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
|
||||
IndexInput input = directory.openInput(segFileName, context);
|
||||
ChecksumIndexInput input = directory.openChecksumInput(segFileName, context);
|
||||
boolean success = false;
|
||||
try {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
|
@ -97,6 +98,8 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
|
|||
String fileName = readString(SI_FILE.length, scratch);
|
||||
files.add(fileName);
|
||||
}
|
||||
|
||||
SimpleTextUtil.checkFooter(input, SI_CHECKSUM);
|
||||
|
||||
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
||||
isCompoundFile, null, diagnostics);
|
||||
|
|
|
@ -47,6 +47,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|||
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
||||
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
||||
final static BytesRef SI_FILE = new BytesRef(" file ");
|
||||
final static BytesRef SI_CHECKSUM = new BytesRef(" checksum ");
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
||||
|
@ -55,7 +56,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|||
si.addFile(segFileName);
|
||||
|
||||
boolean success = false;
|
||||
IndexOutput output = dir.createOutput(segFileName, ioContext);
|
||||
IndexOutput output = dir.createOutput(segFileName, ioContext);
|
||||
|
||||
try {
|
||||
BytesRef scratch = new BytesRef();
|
||||
|
@ -103,6 +104,11 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
}
|
||||
|
||||
String checksum = Long.toString(output.getChecksum());
|
||||
SimpleTextUtil.write(output, SI_CHECKSUM);
|
||||
SimpleTextUtil.write(output, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
|
|
@ -27,6 +27,8 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -79,15 +81,17 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
// stored fields file in entirety up-front and save the offsets
|
||||
// so we can seek to the documents later.
|
||||
private void readIndex(int size) throws IOException {
|
||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
|
||||
offsets = new long[size];
|
||||
int upto = 0;
|
||||
while (!scratch.equals(END)) {
|
||||
readLine();
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (StringHelper.startsWith(scratch, DOC)) {
|
||||
offsets[upto] = in.getFilePointer();
|
||||
offsets[upto] = input.getFilePointer();
|
||||
upto++;
|
||||
}
|
||||
}
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
assert upto == offsets.length;
|
||||
}
|
||||
|
||||
|
@ -189,6 +193,11 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
}
|
||||
|
||||
private String readString(int offset, BytesRef scratch) {
|
||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
|
||||
return scratchUTF16.toString();
|
||||
}
|
||||
|
||||
private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
|
||||
return a.length == b.length - bOffset &&
|
||||
ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
|
||||
|
@ -198,4 +207,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
}
|
||||
|
|
|
@ -51,13 +51,14 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
|
|||
final static BytesRef TYPE_FLOAT = new BytesRef("float");
|
||||
final static BytesRef TYPE_DOUBLE = new BytesRef("double");
|
||||
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef DOC = new BytesRef("doc ");
|
||||
final static BytesRef NUM = new BytesRef(" numfields ");
|
||||
final static BytesRef FIELD = new BytesRef(" field ");
|
||||
final static BytesRef NAME = new BytesRef(" name ");
|
||||
final static BytesRef TYPE = new BytesRef(" type ");
|
||||
final static BytesRef VALUE = new BytesRef(" value ");
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef DOC = new BytesRef("doc ");
|
||||
final static BytesRef NUM = new BytesRef(" numfields ");
|
||||
final static BytesRef FIELD = new BytesRef(" field ");
|
||||
final static BytesRef NAME = new BytesRef(" name ");
|
||||
final static BytesRef TYPE = new BytesRef(" type ");
|
||||
final static BytesRef VALUE = new BytesRef(" value ");
|
||||
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
|
||||
|
@ -171,6 +172,10 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
|
|||
}
|
||||
write(END);
|
||||
newLine();
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
write(CHECKSUM);
|
||||
write(checksum);
|
||||
newLine();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.index.SegmentInfo;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -82,15 +84,17 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
// vectors file in entirety up-front and save the offsets
|
||||
// so we can seek to the data later.
|
||||
private void readIndex(int maxDoc) throws IOException {
|
||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
|
||||
offsets = new long[maxDoc];
|
||||
int upto = 0;
|
||||
while (!scratch.equals(END)) {
|
||||
readLine();
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (StringHelper.startsWith(scratch, DOC)) {
|
||||
offsets[upto] = in.getFilePointer();
|
||||
offsets[upto] = input.getFilePointer();
|
||||
upto++;
|
||||
}
|
||||
}
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
assert upto == offsets.length;
|
||||
}
|
||||
|
||||
|
@ -537,4 +541,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
*/
|
||||
public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||
|
||||
static final BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
static final BytesRef END = new BytesRef("END");
|
||||
static final BytesRef DOC = new BytesRef("doc ");
|
||||
static final BytesRef NUMFIELDS = new BytesRef(" numfields ");
|
||||
|
@ -177,6 +178,10 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
|||
}
|
||||
write(END);
|
||||
newLine();
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
write(CHECKSUM);
|
||||
write(checksum);
|
||||
newLine();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,11 +17,16 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
class SimpleTextUtil {
|
||||
|
@ -67,4 +72,18 @@ class SimpleTextUtil {
|
|||
scratch.offset = 0;
|
||||
scratch.length = upto;
|
||||
}
|
||||
|
||||
public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException {
|
||||
BytesRef scratch = new BytesRef();
|
||||
String expectedChecksum = Long.toString(input.getChecksum());
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, prefix);
|
||||
String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString();
|
||||
if (!expectedChecksum.equals(actualChecksum)) {
|
||||
throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
|
||||
}
|
||||
if (input.length() != input.getFilePointer()) {
|
||||
throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -131,6 +131,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
if (indexVersion != version) {
|
||||
throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
|
||||
}
|
||||
|
||||
// verify
|
||||
if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(indexIn);
|
||||
}
|
||||
|
||||
// Have PostingsReader init itself
|
||||
postingsReader.init(in);
|
||||
|
@ -157,7 +162,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
|
||||
final long sumDocFreq = in.readVLong();
|
||||
final int docCount = in.readVInt();
|
||||
final int longsSize = version >= BlockTreeTermsWriter.TERMS_VERSION_META_ARRAY ? in.readVInt() : 0;
|
||||
final int longsSize = version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
|
||||
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
|
||||
}
|
||||
|
@ -187,9 +192,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
/** Reads terms file header. */
|
||||
private int readHeader(IndexInput input) throws IOException {
|
||||
int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
|
||||
BlockTreeTermsWriter.TERMS_VERSION_START,
|
||||
BlockTreeTermsWriter.TERMS_VERSION_CURRENT);
|
||||
if (version < BlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) {
|
||||
BlockTreeTermsWriter.VERSION_START,
|
||||
BlockTreeTermsWriter.VERSION_CURRENT);
|
||||
if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
|
||||
dirOffset = input.readLong();
|
||||
}
|
||||
return version;
|
||||
|
@ -198,9 +203,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
/** Reads index file header. */
|
||||
private int readIndexHeader(IndexInput input) throws IOException {
|
||||
int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
|
||||
BlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
|
||||
BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
|
||||
if (version < BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
|
||||
BlockTreeTermsWriter.VERSION_START,
|
||||
BlockTreeTermsWriter.VERSION_CURRENT);
|
||||
if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
|
||||
indexDirOffset = input.readLong();
|
||||
}
|
||||
return version;
|
||||
|
@ -209,7 +214,10 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
/** Seek {@code input} to the directory offset. */
|
||||
private void seekDir(IndexInput input, long dirOffset)
|
||||
throws IOException {
|
||||
if (version >= BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
|
||||
if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
|
||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||
dirOffset = input.readLong();
|
||||
} else if (version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
|
||||
input.seek(input.length() - 8);
|
||||
dirOffset = input.readLong();
|
||||
}
|
||||
|
@ -2977,4 +2985,15 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
return sizeInByes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
|
||||
// term dictionary
|
||||
CodecUtil.checksumEntireFile(in);
|
||||
|
||||
// postings
|
||||
postingsReader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*
|
||||
* <ul>
|
||||
* <li>TermsDict (.tim) --> Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
|
||||
* FieldSummary, DirOffset</li>
|
||||
* FieldSummary, DirOffset, Footer</li>
|
||||
* <li>NodeBlock --> (OuterNode | InnerNode)</li>
|
||||
* <li>OuterNode --> EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata</i>><sup>EntryCount</sup></li>
|
||||
* <li>InnerNode --> EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ? ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata ? </i>><sup>EntryCount</sup></li>
|
||||
|
@ -122,6 +122,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* FieldNumber,RootCodeLength,DocCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq -->
|
||||
* {@link DataOutput#writeVLong VLong}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
|
@ -150,12 +151,13 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
|
||||
* <ul>
|
||||
* <li>TermsIndex (.tip) --> Header, FSTIndex<sup>NumFields</sup>
|
||||
* <IndexStartFP><sup>NumFields</sup>, DirOffset</li>
|
||||
* <IndexStartFP><sup>NumFields</sup>, DirOffset, Footer</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
||||
* <li>IndexStartFP --> {@link DataOutput#writeVLong VLong}</li>
|
||||
* <!-- TODO: better describe FST output here -->
|
||||
* <li>FSTIndex --> {@link FST FST<byte[]>}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
|
@ -178,7 +180,6 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* @see BlockTreeTermsReader
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
|
||||
|
||||
/** Suggested default value for the {@code
|
||||
|
@ -204,33 +205,24 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
|
|||
final static String TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT";
|
||||
|
||||
/** Initial terms format. */
|
||||
public static final int TERMS_VERSION_START = 0;
|
||||
public static final int VERSION_START = 0;
|
||||
|
||||
/** Append-only */
|
||||
public static final int TERMS_VERSION_APPEND_ONLY = 1;
|
||||
public static final int VERSION_APPEND_ONLY = 1;
|
||||
|
||||
/** Meta data as array */
|
||||
public static final int TERMS_VERSION_META_ARRAY = 2;
|
||||
public static final int VERSION_META_ARRAY = 2;
|
||||
|
||||
/** checksums */
|
||||
public static final int VERSION_CHECKSUM = 3;
|
||||
|
||||
/** Current terms format. */
|
||||
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_META_ARRAY;
|
||||
public static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
/** Extension of terms index file */
|
||||
static final String TERMS_INDEX_EXTENSION = "tip";
|
||||
final static String TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX";
|
||||
|
||||
/** Initial index format. */
|
||||
public static final int TERMS_INDEX_VERSION_START = 0;
|
||||
|
||||
/** Append-only */
|
||||
public static final int TERMS_INDEX_VERSION_APPEND_ONLY = 1;
|
||||
|
||||
/** Meta data as array */
|
||||
public static final int TERMS_INDEX_VERSION_META_ARRAY = 2;
|
||||
|
||||
/** Current index format. */
|
||||
public static final int TERMS_INDEX_VERSION_CURRENT = TERMS_INDEX_VERSION_META_ARRAY;
|
||||
|
||||
private final IndexOutput out;
|
||||
private final IndexOutput indexOut;
|
||||
final int maxDoc;
|
||||
|
@ -326,12 +318,12 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
|
|||
|
||||
/** Writes the terms file header. */
|
||||
private void writeHeader(IndexOutput out) throws IOException {
|
||||
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
|
||||
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** Writes the index file header. */
|
||||
private void writeIndexHeader(IndexOutput out) throws IOException {
|
||||
CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, TERMS_INDEX_VERSION_CURRENT);
|
||||
CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** Writes the terms file trailer. */
|
||||
|
@ -1139,13 +1131,13 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
|
|||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
if (TERMS_VERSION_CURRENT >= TERMS_VERSION_META_ARRAY) {
|
||||
out.writeVInt(field.longsSize);
|
||||
}
|
||||
out.writeVInt(field.longsSize);
|
||||
indexOut.writeVLong(field.indexStartFP);
|
||||
}
|
||||
writeTrailer(out, dirStart);
|
||||
CodecUtil.writeFooter(out);
|
||||
writeIndexTrailer(indexOut, indexDirStart);
|
||||
CodecUtil.writeFooter(indexOut);
|
||||
} catch (IOException ioe2) {
|
||||
ioe = ioe2;
|
||||
} finally {
|
||||
|
|
|
@ -23,8 +23,12 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -43,6 +47,10 @@ public final class CodecUtil {
|
|||
* Constant to identify the start of a codec header.
|
||||
*/
|
||||
public final static int CODEC_MAGIC = 0x3fd76c17;
|
||||
/**
|
||||
* Constant to identify the start of a codec footer.
|
||||
*/
|
||||
public final static int FOOTER_MAGIC = ~CODEC_MAGIC;
|
||||
|
||||
/**
|
||||
* Writes a codec header, which records both a string to
|
||||
|
@ -150,4 +158,119 @@ public final class CodecUtil {
|
|||
|
||||
return actualVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a codec footer, which records both a checksum
|
||||
* algorithm ID and a checksum. This footer can
|
||||
* be parsed and validated with
|
||||
* {@link #checkFooter(ChecksumIndexInput) checkFooter()}.
|
||||
* <p>
|
||||
* CodecFooter --> Magic,AlgorithmID,Checksum
|
||||
* <ul>
|
||||
* <li>Magic --> {@link DataOutput#writeInt Uint32}. This
|
||||
* identifies the start of the footer. It is always {@value #FOOTER_MAGIC}.
|
||||
* <li>AlgorithmID --> {@link DataOutput#writeInt Uint32}. This
|
||||
* indicates the checksum algorithm used. Currently this is always 0,
|
||||
* for zlib-crc32.
|
||||
* <li>Checksum --> {@link DataOutput#writeLong Uint32}. The
|
||||
* actual checksum value for all previous bytes in the stream, including
|
||||
* the bytes from Magic and AlgorithmID.
|
||||
* </ul>
|
||||
*
|
||||
* @param out Output stream
|
||||
* @throws IOException If there is an I/O error writing to the underlying medium.
|
||||
*/
|
||||
public static void writeFooter(IndexOutput out) throws IOException {
|
||||
out.writeInt(FOOTER_MAGIC);
|
||||
out.writeInt(0);
|
||||
out.writeLong(out.getChecksum());
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the length of a codec footer.
|
||||
*
|
||||
* @return length of the entire codec footer.
|
||||
* @see #writeFooter(IndexOutput)
|
||||
*/
|
||||
public static int footerLength() {
|
||||
return 16;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates the codec footer previously written by {@link #writeFooter}.
|
||||
* @return actual checksum value
|
||||
* @throws IOException if the footer is invalid, if the checksum does not match,
|
||||
* or if {@code in} is not properly positioned before the footer
|
||||
* at the end of the stream.
|
||||
*/
|
||||
public static long checkFooter(ChecksumIndexInput in) throws IOException {
|
||||
validateFooter(in);
|
||||
long actualChecksum = in.getChecksum();
|
||||
long expectedChecksum = in.readLong();
|
||||
if (expectedChecksum != actualChecksum) {
|
||||
throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) +
|
||||
" actual=" + Long.toHexString(actualChecksum) +
|
||||
" (resource=" + in + ")");
|
||||
}
|
||||
if (in.getFilePointer() != in.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
|
||||
}
|
||||
return actualChecksum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
|
||||
* @return actual checksum value
|
||||
* @throws IOException if the footer is invalid
|
||||
*/
|
||||
public static long retrieveChecksum(IndexInput in) throws IOException {
|
||||
in.seek(in.length() - footerLength());
|
||||
validateFooter(in);
|
||||
return in.readLong();
|
||||
}
|
||||
|
||||
private static void validateFooter(IndexInput in) throws IOException {
|
||||
final int magic = in.readInt();
|
||||
if (magic != FOOTER_MAGIC) {
|
||||
throw new CorruptIndexException("codec footer mismatch: actual footer=" + magic + " vs expected footer=" + FOOTER_MAGIC + " (resource: " + in + ")");
|
||||
}
|
||||
|
||||
final int algorithmID = in.readInt();
|
||||
if (algorithmID != 0) {
|
||||
throw new CorruptIndexException("codec footer mismatch: unknown algorithmID: " + algorithmID);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the stream is positioned at the end, and throws exception
|
||||
* if it is not.
|
||||
* @deprecated Use {@link #checkFooter} instead, this should only used for files without checksums
|
||||
*/
|
||||
@Deprecated
|
||||
public static void checkEOF(IndexInput in) throws IOException {
|
||||
if (in.getFilePointer() != in.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter}
|
||||
* <p>
|
||||
* Note that this method may be slow, as it must process the entire file.
|
||||
* If you just need to extract the checksum value, call {@link #retrieveChecksum}.
|
||||
*/
|
||||
public static long checksumEntireFile(IndexInput input) throws IOException {
|
||||
IndexInput clone = input.clone();
|
||||
clone.seek(0);
|
||||
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
|
||||
assert in.getFilePointer() == 0;
|
||||
final byte[] buffer = new byte[1024];
|
||||
long bytesToRead = in.length() - footerLength();
|
||||
for (long skipped = 0; skipped < bytesToRead; ) {
|
||||
final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length);
|
||||
in.readBytes(buffer, 0, toRead);
|
||||
skipped += toRead;
|
||||
}
|
||||
return checkFooter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,6 +67,15 @@ public abstract class DocValuesProducer implements Closeable {
|
|||
/** Returns approximate RAM bytes used */
|
||||
public abstract long ramBytesUsed();
|
||||
|
||||
/**
|
||||
* Checks consistency of this producer
|
||||
* <p>
|
||||
* Note that this may be costly in terms of I/O, e.g.
|
||||
* may involve computing a checksum value against large data files.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
|
||||
/**
|
||||
* A simple implementation of {@link DocValuesProducer#getDocsWithField} that
|
||||
* returns {@code true} if a document has an ordinal >= 0
|
||||
|
|
|
@ -39,4 +39,13 @@ public abstract class FieldsProducer extends Fields implements Closeable {
|
|||
|
||||
/** Returns approximate RAM bytes used */
|
||||
public abstract long ramBytesUsed();
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
* <p>
|
||||
* Note that this may be costly in terms of I/O, e.g.
|
||||
* may involve computing a checksum value against large data files.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
}
|
||||
|
|
|
@ -72,6 +72,15 @@ public abstract class PostingsReaderBase implements Closeable {
|
|||
/** Returns approximate RAM bytes used */
|
||||
public abstract long ramBytesUsed();
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
* <p>
|
||||
* Note that this may be costly in terms of I/O, e.g.
|
||||
* may involve computing a checksum value against large data files.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
|
||||
@Override
|
||||
public abstract void close() throws IOException;
|
||||
}
|
||||
|
|
|
@ -43,4 +43,13 @@ public abstract class StoredFieldsReader implements Cloneable, Closeable {
|
|||
|
||||
/** Returns approximate RAM bytes used */
|
||||
public abstract long ramBytesUsed();
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
* <p>
|
||||
* Note that this may be costly in terms of I/O, e.g.
|
||||
* may involve computing a checksum value against large data files.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
}
|
||||
|
|
|
@ -45,6 +45,15 @@ public abstract class TermVectorsReader implements Cloneable, Closeable {
|
|||
/** Returns approximate RAM bytes used */
|
||||
public abstract long ramBytesUsed();
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
* <p>
|
||||
* Note that this may be costly in terms of I/O, e.g.
|
||||
* may involve computing a checksum value against large data files.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
|
||||
/** Create a clone that one caller at a time may use to
|
||||
* read term vectors. */
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Closeable;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -52,6 +53,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>AvgChunkSize --> the average size of a chunk of compressed documents, as a {@link DataOutput#writeVLong VLong}</li>
|
||||
* <li>BitsPerStartPointerDelta --> number of bits required to represent a delta from the average using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li>
|
||||
* <li>StartPointerDeltas --> {@link PackedInts packed} array of BlockChunks elements of BitsPerStartPointerDelta bits each, representing the deltas from the average start pointer using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes</p>
|
||||
* <ul>
|
||||
|
@ -198,6 +200,7 @@ public final class CompressingStoredFieldsIndexWriter implements Closeable {
|
|||
writeBlock();
|
||||
}
|
||||
fieldsIndexOut.writeVInt(0); // end marker
|
||||
CodecUtil.writeFooter(fieldsIndexOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -28,6 +28,7 @@ import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter
|
|||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
|
||||
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
|
||||
|
@ -48,6 +49,7 @@ import org.apache.lucene.index.SegmentInfo;
|
|||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -114,17 +116,20 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
|||
boolean success = false;
|
||||
fieldInfos = fn;
|
||||
numDocs = si.getDocCount();
|
||||
IndexInput indexStream = null;
|
||||
ChecksumIndexInput indexStream = null;
|
||||
try {
|
||||
// Load the index into memory
|
||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION);
|
||||
indexStream = d.openInput(indexStreamFN, context);
|
||||
indexStream = d.openChecksumInput(indexStreamFN, context);
|
||||
final String codecNameIdx = formatName + CODEC_SFX_IDX;
|
||||
version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
|
||||
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
|
||||
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
|
||||
if (indexStream.getFilePointer() != indexStream.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")");
|
||||
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(indexStream);
|
||||
} else {
|
||||
CodecUtil.checkEOF(indexStream);
|
||||
}
|
||||
indexStream.close();
|
||||
indexStream = null;
|
||||
|
@ -510,4 +515,11 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
|||
return indexReader.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(fieldsStream);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -71,7 +71,8 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
|||
static final String CODEC_SFX_DAT = "Data";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_BIG_CHUNKS = 1;
|
||||
static final int VERSION_CURRENT = VERSION_BIG_CHUNKS;
|
||||
static final int VERSION_CHECKSUM = 2;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
private final Directory directory;
|
||||
private final String segment;
|
||||
|
@ -106,9 +107,11 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
|||
this.numBufferedDocs = 0;
|
||||
|
||||
boolean success = false;
|
||||
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context);
|
||||
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION),
|
||||
context);
|
||||
try {
|
||||
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
|
||||
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
|
||||
context);
|
||||
|
||||
final String codecNameIdx = formatName + CODEC_SFX_IDX;
|
||||
final String codecNameDat = formatName + CODEC_SFX_DAT;
|
||||
|
@ -314,6 +317,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
|||
throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
|
||||
}
|
||||
indexWriter.finish(numDocs);
|
||||
CodecUtil.writeFooter(fieldsStream);
|
||||
assert bufferedDocs.length == 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.
|
|||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
@ -48,6 +49,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -69,6 +71,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
private final FieldInfos fieldInfos;
|
||||
final CompressingStoredFieldsIndexReader indexReader;
|
||||
final IndexInput vectorsStream;
|
||||
private final int version;
|
||||
private final int packedIntsVersion;
|
||||
private final CompressionMode compressionMode;
|
||||
private final Decompressor decompressor;
|
||||
|
@ -88,6 +91,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
this.chunkSize = reader.chunkSize;
|
||||
this.numDocs = reader.numDocs;
|
||||
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
|
||||
this.version = reader.version;
|
||||
this.closed = false;
|
||||
}
|
||||
|
||||
|
@ -99,17 +103,20 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
boolean success = false;
|
||||
fieldInfos = fn;
|
||||
numDocs = si.getDocCount();
|
||||
IndexInput indexStream = null;
|
||||
ChecksumIndexInput indexStream = null;
|
||||
try {
|
||||
// Load the index into memory
|
||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
|
||||
indexStream = d.openInput(indexStreamFN, context);
|
||||
indexStream = d.openChecksumInput(indexStreamFN, context);
|
||||
final String codecNameIdx = formatName + CODEC_SFX_IDX;
|
||||
int version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
|
||||
version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
|
||||
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
|
||||
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
|
||||
if (indexStream.getFilePointer() != indexStream.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")");
|
||||
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(indexStream);
|
||||
} else {
|
||||
CodecUtil.checkEOF(indexStream);
|
||||
}
|
||||
indexStream.close();
|
||||
indexStream = null;
|
||||
|
@ -1045,5 +1052,12 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
public long ramBytesUsed() {
|
||||
return indexReader.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(vectorsStream);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -66,7 +66,8 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
|
|||
static final String CODEC_SFX_DAT = "Data";
|
||||
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
static final int VERSION_CHECKSUM = 1;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
static final int BLOCK_SIZE = 64;
|
||||
|
||||
|
@ -220,9 +221,11 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
|
|||
lastTerm = new BytesRef(ArrayUtil.oversize(30, 1));
|
||||
|
||||
boolean success = false;
|
||||
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context);
|
||||
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION),
|
||||
context);
|
||||
try {
|
||||
vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
|
||||
vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION),
|
||||
context);
|
||||
|
||||
final String codecNameIdx = formatName + CODEC_SFX_IDX;
|
||||
final String codecNameDat = formatName + CODEC_SFX_DAT;
|
||||
|
@ -659,6 +662,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
|
|||
throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs);
|
||||
}
|
||||
indexWriter.finish(numDocs);
|
||||
CodecUtil.writeFooter(vectorsStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,8 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.CompoundFileDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
@ -198,9 +200,12 @@ final class BitVector implements Cloneable, MutableBits {
|
|||
// Changed DGaps to encode gaps between cleared bits, not
|
||||
// set:
|
||||
public final static int VERSION_DGAPS_CLEARED = 1;
|
||||
|
||||
// added checksum
|
||||
public final static int VERSION_CHECKSUM = 2;
|
||||
|
||||
// Increment version to change it:
|
||||
public final static int VERSION_CURRENT = VERSION_DGAPS_CLEARED;
|
||||
public final static int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
public int getVersion() {
|
||||
return version;
|
||||
|
@ -221,6 +226,7 @@ final class BitVector implements Cloneable, MutableBits {
|
|||
} else {
|
||||
writeBits(output);
|
||||
}
|
||||
CodecUtil.writeFooter(output);
|
||||
assert verifyCount();
|
||||
} finally {
|
||||
IOUtils.close(output);
|
||||
|
@ -324,7 +330,7 @@ final class BitVector implements Cloneable, MutableBits {
|
|||
<code>d</code>, as written by the {@link #write} method.
|
||||
*/
|
||||
public BitVector(Directory d, String name, IOContext context) throws IOException {
|
||||
IndexInput input = d.openInput(name, context);
|
||||
ChecksumIndexInput input = d.openChecksumInput(name, context);
|
||||
|
||||
try {
|
||||
final int firstInt = input.readInt();
|
||||
|
@ -334,8 +340,8 @@ final class BitVector implements Cloneable, MutableBits {
|
|||
version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION_CURRENT);
|
||||
size = input.readInt();
|
||||
} else {
|
||||
version = VERSION_PRE;
|
||||
size = firstInt;
|
||||
// we started writing full header well before 4.0
|
||||
throw new IndexFormatTooOldException(input.toString(), Integer.toString(firstInt));
|
||||
}
|
||||
if (size == -1) {
|
||||
if (version >= VERSION_DGAPS_CLEARED) {
|
||||
|
@ -351,6 +357,11 @@ final class BitVector implements Cloneable, MutableBits {
|
|||
invertAll();
|
||||
}
|
||||
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(input);
|
||||
} else {
|
||||
CodecUtil.checkEOF(input);
|
||||
}
|
||||
assert verifyCount();
|
||||
} finally {
|
||||
input.close();
|
||||
|
|
|
@ -105,9 +105,7 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(input);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
@ -327,9 +325,7 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
PagedBytes bytes = new PagedBytes(16);
|
||||
bytes.copy(input, fixedLength * (long)state.segmentInfo.getDocCount());
|
||||
final PagedBytes.Reader bytesReader = bytes.freeze(true);
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(input);
|
||||
success = true;
|
||||
ramBytesUsed.addAndGet(bytes.ramBytesUsed());
|
||||
return new BinaryDocValues() {
|
||||
|
@ -367,12 +363,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
bytes.copy(data, totalBytes);
|
||||
final PagedBytes.Reader bytesReader = bytes.freeze(true);
|
||||
final PackedInts.Reader reader = PackedInts.getReader(index);
|
||||
if (data.getFilePointer() != data.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||
}
|
||||
if (index.getFilePointer() != index.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(data);
|
||||
CodecUtil.checkEOF(index);
|
||||
success = true;
|
||||
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
|
||||
return new BinaryDocValues() {
|
||||
|
@ -414,12 +406,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
bytes.copy(data, fixedLength * (long) valueCount);
|
||||
final PagedBytes.Reader bytesReader = bytes.freeze(true);
|
||||
final PackedInts.Reader reader = PackedInts.getReader(index);
|
||||
if (data.getFilePointer() != data.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||
}
|
||||
if (index.getFilePointer() != index.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(data);
|
||||
CodecUtil.checkEOF(index);
|
||||
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
|
||||
success = true;
|
||||
return new BinaryDocValues() {
|
||||
|
@ -459,12 +447,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
bytes.copy(data, totalBytes);
|
||||
final PagedBytes.Reader bytesReader = bytes.freeze(true);
|
||||
final PackedInts.Reader reader = PackedInts.getReader(index);
|
||||
if (data.getFilePointer() != data.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||
}
|
||||
if (index.getFilePointer() != index.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(data);
|
||||
CodecUtil.checkEOF(index);
|
||||
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
|
||||
success = true;
|
||||
return new BinaryDocValues() {
|
||||
|
@ -515,12 +499,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
if (data.getFilePointer() != data.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||
}
|
||||
if (index.getFilePointer() != index.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(data);
|
||||
CodecUtil.checkEOF(index);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
@ -654,4 +634,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
public long ramBytesUsed() {
|
||||
return ramBytesUsed.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
}
|
||||
}
|
||||
|
|
|
@ -107,9 +107,7 @@ class Lucene40FieldInfosReader extends FieldInfosReader {
|
|||
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, Collections.unmodifiableMap(attributes));
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(input);
|
||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||
success = true;
|
||||
return fieldInfos;
|
||||
|
|
|
@ -1168,4 +1168,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
|
||||
}
|
||||
|
|
|
@ -64,9 +64,7 @@ public class Lucene40SegmentInfoReader extends SegmentInfoReader {
|
|||
input.readStringStringMap(); // read deprecated attributes
|
||||
final Set<String> files = input.readStringSet();
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(input);
|
||||
|
||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
|
||||
si.setFiles(files);
|
||||
|
|
|
@ -250,4 +250,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
}
|
||||
|
|
|
@ -760,5 +760,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
}
|
||||
|
||||
|
|
|
@ -132,6 +132,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>Header, --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>PackedBlockSize, SingletonDocID --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link DataOutput#writeVLong VLong}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
|
@ -190,7 +191,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* each packed or VInt block, when the length of document list is larger than packed block size.</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup></li>
|
||||
* <li>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup>, Footer</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>TermFreqs --> <PackedBlock> <sup>PackedDocBlockNum</sup>,
|
||||
* VIntBlock? </li>
|
||||
|
@ -206,6 +207,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* -->
|
||||
* {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>SkipChildLevelPointer --> {@link DataOutput#writeVLong VLong}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
|
@ -273,7 +275,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <p>The .pos file contains the lists of positions that each term occurs at within documents. It also
|
||||
* sometimes stores part of payloads and offsets for speedup.</p>
|
||||
* <ul>
|
||||
* <li>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup></li>
|
||||
* <li>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup>, Footer</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>TermPositions --> <PackedPosDeltaBlock> <sup>PackedPosBlockNum</sup>,
|
||||
* VIntBlock? </li>
|
||||
|
@ -283,6 +285,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>PositionDelta, OffsetDelta, OffsetLength -->
|
||||
* {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>PayloadData --> {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
|
@ -325,13 +328,14 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <p>The .pay file will store payloads and offsets associated with certain term-document positions.
|
||||
* Some payloads and offsets will be separated out into .pos file, for performance reasons.</p>
|
||||
* <ul>
|
||||
* <li>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup></li>
|
||||
* <li>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup>, Footer</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> <sup>PackedPayBlockNum</sup>
|
||||
* <li>TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> <sup>PackedPayBlockNum</sup>
|
||||
* <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> {@link PackedInts PackedInts}</li>
|
||||
* <li>SumPayLength --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>PayData --> {@link DataOutput#writeByte byte}<sup>SumPayLength</sup></li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
@ -1547,4 +1546,18 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
|||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
|
||||
if (docIn != null) {
|
||||
CodecUtil.checksumEntireFile(docIn);
|
||||
}
|
||||
if (posIn != null) {
|
||||
CodecUtil.checksumEntireFile(posIn);
|
||||
}
|
||||
if (payIn != null) {
|
||||
CodecUtil.checksumEntireFile(payIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,11 +64,12 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
|
|||
// Increment version to change it
|
||||
final static int VERSION_START = 0;
|
||||
final static int VERSION_META_ARRAY = 1;
|
||||
final static int VERSION_CURRENT = VERSION_META_ARRAY;
|
||||
final static int VERSION_CHECKSUM = 2;
|
||||
final static int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
final IndexOutput docOut;
|
||||
final IndexOutput posOut;
|
||||
final IndexOutput payOut;
|
||||
IndexOutput docOut;
|
||||
IndexOutput posOut;
|
||||
IndexOutput payOut;
|
||||
|
||||
final static IntBlockTermState emptyState = new IntBlockTermState();
|
||||
IntBlockTermState lastState;
|
||||
|
@ -113,7 +114,7 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
|
|||
super();
|
||||
|
||||
docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION),
|
||||
state.context);
|
||||
state.context);
|
||||
IndexOutput posOut = null;
|
||||
IndexOutput payOut = null;
|
||||
boolean success = false;
|
||||
|
@ -123,7 +124,7 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
|
|||
if (state.fieldInfos.hasProx()) {
|
||||
posDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
|
||||
state.context);
|
||||
state.context);
|
||||
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
|
||||
|
||||
if (state.fieldInfos.hasPayloads()) {
|
||||
|
@ -144,7 +145,7 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
|
|||
|
||||
if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
|
||||
payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
|
||||
state.context);
|
||||
state.context);
|
||||
CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT);
|
||||
}
|
||||
} else {
|
||||
|
@ -569,6 +570,26 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(docOut, posOut, payOut);
|
||||
// TODO: add a finish() at least to PushBase? DV too...?
|
||||
boolean success = false;
|
||||
try {
|
||||
if (docOut != null) {
|
||||
CodecUtil.writeFooter(docOut);
|
||||
}
|
||||
if (posOut != null) {
|
||||
CodecUtil.writeFooter(posOut);
|
||||
}
|
||||
if (payOut != null) {
|
||||
CodecUtil.writeFooter(payOut);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(docOut, posOut, payOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
|
||||
}
|
||||
docOut = posOut = payOut = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
|
|||
* <p>The DocValues metadata or .dvm file.</p>
|
||||
* <p>For DocValues field, this stores metadata, such as the offset into the
|
||||
* DocValues data (.dvd)</p>
|
||||
* <p>DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry><sup>NumFields</sup></p>
|
||||
* <p>DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry><sup>NumFields</sup>,Footer</p>
|
||||
* <ul>
|
||||
* <li>Entry --> NumericEntry | BinaryEntry | SortedEntry</li>
|
||||
* <li>NumericEntry --> DataOffset,CompressionType,PackedVersion</li>
|
||||
|
@ -78,6 +78,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
|
|||
* <li>DataOffset,DataLength --> {@link DataOutput#writeLong Int64}</li>
|
||||
* <li>EntryType,CompressionType --> {@link DataOutput#writeByte Byte}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
|
||||
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
|
||||
|
@ -105,7 +106,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
|
|||
* <li><a name="dvd" id="dvd"></a>
|
||||
* <p>The DocValues data or .dvd file.</p>
|
||||
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
|
||||
* <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup></p>
|
||||
* <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup>,Footer</p>
|
||||
* <ul>
|
||||
* <li>NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</li>
|
||||
* <li>BinaryData --> {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
|
||||
|
@ -114,6 +115,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
|
|||
* <li>TableCompressedNumerics --> TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
|
||||
* <li>UncompressedNumerics --> {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li>
|
||||
* <li>Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=4096)}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
|
||||
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -64,6 +65,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
|||
private final Map<Integer,BinaryEntry> binaries;
|
||||
private final Map<Integer,FSTEntry> fsts;
|
||||
private final IndexInput data;
|
||||
private final int version;
|
||||
|
||||
// ram instances we have already loaded
|
||||
private final Map<Integer,NumericDocValues> numericInstances =
|
||||
|
@ -89,16 +91,16 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_GCD_COMPRESSION = 1;
|
||||
static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
|
||||
static final int VERSION_CHECKSUM = 2;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
maxDoc = state.segmentInfo.getDocCount();
|
||||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||
// read in the entries from the metadata file.
|
||||
IndexInput in = state.directory.openInput(metaName, state.context);
|
||||
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
|
||||
boolean success = false;
|
||||
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
|
||||
final int version;
|
||||
try {
|
||||
version = CodecUtil.checkHeader(in, metaCodec,
|
||||
VERSION_START,
|
||||
|
@ -108,8 +110,10 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
|||
fsts = new HashMap<>();
|
||||
readFields(in, state.fieldInfos);
|
||||
|
||||
if (in.getFilePointer() != in.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(in);
|
||||
} else {
|
||||
CodecUtil.checkEOF(in);
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
@ -199,6 +203,13 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
|||
return ramBytesUsed.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(data);
|
||||
}
|
||||
}
|
||||
|
||||
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
|
||||
NumericEntry entry = numerics.get(field.number);
|
||||
data.seek(entry.offset);
|
||||
|
|
|
@ -92,9 +92,7 @@ final class Lucene42FieldInfosReader extends FieldInfosReader {
|
|||
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(input);
|
||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||
success = true;
|
||||
return fieldInfos;
|
||||
|
|
|
@ -34,14 +34,12 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
|
|||
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT;
|
||||
|
||||
/**
|
||||
* Writer for {@link Lucene42NormsFormat}
|
||||
*/
|
||||
class Lucene42NormsConsumer extends DocValuesConsumer {
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_GCD_COMPRESSION = 1;
|
||||
static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
|
||||
|
||||
class Lucene42NormsConsumer extends DocValuesConsumer {
|
||||
static final byte NUMBER = 0;
|
||||
|
||||
static final int BLOCK_SIZE = 4096;
|
||||
|
@ -51,7 +49,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
|
|||
static final byte UNCOMPRESSED = 2;
|
||||
static final byte GCD_COMPRESSED = 3;
|
||||
|
||||
final IndexOutput data, meta;
|
||||
IndexOutput data, meta;
|
||||
final int maxDoc;
|
||||
final float acceptableOverheadRatio;
|
||||
|
||||
|
@ -181,6 +179,10 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
|
|||
try {
|
||||
if (meta != null) {
|
||||
meta.writeVInt(-1); // write EOF marker
|
||||
CodecUtil.writeFooter(meta); // write checksum
|
||||
}
|
||||
if (data != null) {
|
||||
CodecUtil.writeFooter(data); // write checksum
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -189,6 +191,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
|
|||
} else {
|
||||
IOUtils.closeWhileHandlingException(data, meta);
|
||||
}
|
||||
meta = data = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* {@link BlockPackedWriter blocks of packed ints} for positions.</p>
|
||||
* <p>Here is a more detailed description of the field data file format:</p>
|
||||
* <ul>
|
||||
* <li>VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk><sup>ChunkCount</sup></li>
|
||||
* <li>VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk><sup>ChunkCount</sup>, Footer</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>PackedIntsVersion --> {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>ChunkSize is the number of bytes of terms to accumulate before flushing, as a {@link DataOutput#writeVInt VInt}</li>
|
||||
|
@ -107,14 +107,16 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>FieldTermsAndPayLoads --> Terms (Payloads)</li>
|
||||
* <li>Terms: term bytes</li>
|
||||
* <li>Payloads: payload bytes (if the field has payloads)</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li><a name="vector_index" id="vector_index"></a>
|
||||
* <p>An index file (extension <tt>.tvx</tt>).</p>
|
||||
* <ul>
|
||||
* <li>VectorIndex (.tvx) --> <Header>, <ChunkIndex></li>
|
||||
* <li>VectorIndex (.tvx) --> <Header>, <ChunkIndex>, Footer</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>ChunkIndex: See {@link CompressingStoredFieldsIndexWriter}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* </ol>
|
||||
|
|
|
@ -66,7 +66,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
* of indirection: docId -> ord. */
|
||||
public static final int SORTED_SET_SINGLE_VALUED_SORTED = 1;
|
||||
|
||||
final IndexOutput data, meta;
|
||||
IndexOutput data, meta;
|
||||
final int maxDoc;
|
||||
|
||||
/** expert: Creates a new writer */
|
||||
|
@ -438,6 +438,10 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
try {
|
||||
if (meta != null) {
|
||||
meta.writeVInt(-1); // write EOF marker
|
||||
CodecUtil.writeFooter(meta); // write checksum
|
||||
}
|
||||
if (data != null) {
|
||||
CodecUtil.writeFooter(data); // write checksum
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -446,6 +450,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
|
|||
} else {
|
||||
IOUtils.closeWhileHandlingException(data, meta);
|
||||
}
|
||||
meta = data = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,7 +89,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <p>The DocValues metadata or .dvm file.</p>
|
||||
* <p>For DocValues field, this stores metadata, such as the offset into the
|
||||
* DocValues data (.dvd)</p>
|
||||
* <p>DocValues metadata (.dvm) --> Header,<Entry><sup>NumFields</sup></p>
|
||||
* <p>DocValues metadata (.dvm) --> Header,<Entry><sup>NumFields</sup>,Footer</p>
|
||||
* <ul>
|
||||
* <li>Entry --> NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li>
|
||||
* <li>NumericEntry --> GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
|
||||
|
@ -109,6 +109,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --> {@link DataOutput#writeLong Int64}</li>
|
||||
* <li>TableSize --> {@link DataOutput#writeVInt vInt}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
|
||||
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
|
||||
|
@ -138,10 +139,13 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* is written for the addresses.
|
||||
* <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
|
||||
* If its -1, then there are no missing values.
|
||||
* <p>Checksum contains the CRC32 checksum of all bytes in the .dvm file up
|
||||
* until the checksum. This is used to verify integrity of the file on opening the
|
||||
* index.
|
||||
* <li><a name="dvd" id="dvd"></a>
|
||||
* <p>The DocValues data or .dvd file.</p>
|
||||
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
|
||||
* <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup></p>
|
||||
* <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup>,Footer</p>
|
||||
* <ul>
|
||||
* <li>NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics</li>
|
||||
* <li>BinaryData --> {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
|
||||
|
@ -150,6 +154,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <li>TableCompressedNumerics --> {@link PackedInts PackedInts}</li>
|
||||
* <li>GCDCompressedNumerics --> {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
|
||||
* <li>Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
|
||||
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
|
||||
|
@ -179,7 +184,8 @@ public final class Lucene45DocValuesFormat extends DocValuesFormat {
|
|||
static final String META_EXTENSION = "dvm";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED = 1;
|
||||
static final int VERSION_CURRENT = VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED;
|
||||
static final int VERSION_CHECKSUM = 2;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
static final byte NUMERIC = 0;
|
||||
static final byte BINARY = 1;
|
||||
static final byte SORTED = 2;
|
||||
|
|
|
@ -50,6 +50,7 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -80,7 +81,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||
// read in the entries from the metadata file.
|
||||
IndexInput in = state.directory.openInput(metaName, state.context);
|
||||
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
|
||||
this.maxDoc = state.segmentInfo.getDocCount();
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -94,8 +95,10 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
sortedSets = new HashMap<>();
|
||||
readFields(in, state.fieldInfos);
|
||||
|
||||
if (in.getFilePointer() != in.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
|
||||
if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(in);
|
||||
} else {
|
||||
CodecUtil.checkEOF(in);
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
@ -299,6 +302,13 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
return ramBytesUsed.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
|
||||
CodecUtil.checksumEntireFile(data);
|
||||
}
|
||||
}
|
||||
|
||||
LongValues getNumeric(NumericEntry entry) throws IOException {
|
||||
final IndexInput data = this.data.clone();
|
||||
data.seek(entry.offset);
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.store.DataOutput;
|
|||
* <p>
|
||||
* <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p>
|
||||
* <p>FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber,
|
||||
* FieldBits,DocValuesBits,DocValuesGen,Attributes> <sup>FieldsCount</sup></p>
|
||||
* FieldBits,DocValuesBits,DocValuesGen,Attributes> <sup>FieldsCount</sup>,Footer</p>
|
||||
* <p>Data types:
|
||||
* <ul>
|
||||
* <li>Header --> {@link CodecUtil#checkHeader CodecHeader}</li>
|
||||
|
@ -42,6 +42,7 @@ import org.apache.lucene.store.DataOutput;
|
|||
* <li>FieldNumber --> {@link DataOutput#writeInt VInt}</li>
|
||||
* <li>Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||
* <li>DocValuesGen --> {@link DataOutput#writeLong(long) Int64}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* Field Descriptions:
|
||||
|
@ -113,7 +114,8 @@ public final class Lucene46FieldInfosFormat extends FieldInfosFormat {
|
|||
// Codec header
|
||||
static final String CODEC_NAME = "Lucene46FieldInfos";
|
||||
static final int FORMAT_START = 0;
|
||||
static final int FORMAT_CURRENT = FORMAT_START;
|
||||
static final int FORMAT_CHECKSUM = 1;
|
||||
static final int FORMAT_CURRENT = FORMAT_CHECKSUM;
|
||||
|
||||
// Field flags
|
||||
static final byte IS_INDEXED = 0x1;
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfos;
|
|||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -49,13 +50,13 @@ final class Lucene46FieldInfosReader extends FieldInfosReader {
|
|||
@Override
|
||||
public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
|
||||
IndexInput input = directory.openInput(fileName, context);
|
||||
ChecksumIndexInput input = directory.openChecksumInput(fileName, context);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
|
||||
Lucene46FieldInfosFormat.FORMAT_START,
|
||||
Lucene46FieldInfosFormat.FORMAT_CURRENT);
|
||||
int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
|
||||
Lucene46FieldInfosFormat.FORMAT_START,
|
||||
Lucene46FieldInfosFormat.FORMAT_CURRENT);
|
||||
|
||||
final int size = input.readVInt(); //read in the size
|
||||
FieldInfo infos[] = new FieldInfo[size];
|
||||
|
@ -91,9 +92,11 @@ final class Lucene46FieldInfosReader extends FieldInfosReader {
|
|||
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
|
||||
infos[i].setDocValuesGen(dvGen);
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
|
||||
if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
|
||||
CodecUtil.checkFooter(input);
|
||||
} else {
|
||||
CodecUtil.checkEOF(input);
|
||||
}
|
||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||
success = true;
|
||||
|
|
|
@ -26,9 +26,9 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
@ -81,6 +81,7 @@ final class Lucene46FieldInfosWriter extends FieldInfosWriter {
|
|||
output.writeLong(fi.getDocValuesGen());
|
||||
output.writeStringStringMap(fi.attributes());
|
||||
}
|
||||
CodecUtil.writeFooter(output);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
|||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files
|
||||
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
|
||||
* </ul>
|
||||
* </p>
|
||||
* Data types:
|
||||
|
@ -43,6 +43,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
|||
* <li>Files --> {@link DataOutput#writeStringSet Set<String>}</li>
|
||||
* <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* Field Descriptions:
|
||||
|
@ -53,9 +54,6 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
|||
* <li>IsCompoundFile records whether the segment is written as a compound file or
|
||||
* not. If this is -1, the segment is not a compound file. If it is 1, the segment
|
||||
* is a compound file.</li>
|
||||
* <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up
|
||||
* until the checksum. This is used to verify integrity of the file on opening the
|
||||
* index.</li>
|
||||
* <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
|
||||
* for each segment it creates. It includes metadata like the current Lucene
|
||||
* version, OS, Java version, why the segment was created (merge, flush,
|
||||
|
@ -89,5 +87,6 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
|
|||
public final static String SI_EXTENSION = "si";
|
||||
static final String CODEC_NAME = "Lucene46SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
static final int VERSION_CHECKSUM = 1;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
}
|
||||
|
|
|
@ -26,9 +26,9 @@ import org.apache.lucene.codecs.SegmentInfoReader;
|
|||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
@ -46,12 +46,12 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
|
|||
@Override
|
||||
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
|
||||
final IndexInput input = dir.openInput(fileName, context);
|
||||
final ChecksumIndexInput input = dir.openChecksumInput(fileName, context);
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene46SegmentInfoFormat.VERSION_START,
|
||||
Lucene46SegmentInfoFormat.VERSION_CURRENT);
|
||||
int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene46SegmentInfoFormat.VERSION_START,
|
||||
Lucene46SegmentInfoFormat.VERSION_CURRENT);
|
||||
final String version = input.readString();
|
||||
final int docCount = input.readInt();
|
||||
if (docCount < 0) {
|
||||
|
@ -61,8 +61,10 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
|
|||
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||
final Set<String> files = input.readStringSet();
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(input);
|
||||
} else {
|
||||
CodecUtil.checkEOF(input);
|
||||
}
|
||||
|
||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
|
||||
|
|
|
@ -59,7 +59,7 @@ public class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
|
|||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeStringStringMap(si.getDiagnostics());
|
||||
output.writeStringSet(si.files());
|
||||
|
||||
CodecUtil.writeFooter(output);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
|
|
@ -383,6 +383,9 @@ on multi-valued fields.</li>
|
|||
<li>In version 4.5, DocValues were extended to explicitly represent missing values.</li>
|
||||
<li>In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
|
||||
allow updating NumericDocValues fields.</li>
|
||||
<li>In version 4.8, checksum footers were added to the end of each index file
|
||||
for improved data integrity. Specifically, the last 8 bytes of every index file
|
||||
contain the zlib-crc32 checksum of the file.</li>
|
||||
</ul>
|
||||
<a name="Limitations" id="Limitations"></a>
|
||||
<h2>Limitations</h2>
|
||||
|
|
|
@ -310,6 +310,13 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
|
|||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
for (DocValuesProducer format : formats.values()) {
|
||||
format.checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -246,6 +246,13 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
for (FieldsProducer producer : formats.values()) {
|
||||
producer.checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -238,4 +238,13 @@ public abstract class AtomicReader extends IndexReader {
|
|||
* synchronization.
|
||||
*/
|
||||
public abstract Bits getLiveDocs();
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
* <p>
|
||||
* Note that this may be costly in terms of I/O, e.g.
|
||||
* may involve computing a checksum value against large data files.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
}
|
||||
|
|
|
@ -536,6 +536,10 @@ public class CheckIndex {
|
|||
reader = new SegmentReader(info, IOContext.DEFAULT);
|
||||
|
||||
segInfoStat.openReaderPassed = true;
|
||||
|
||||
if (infoStream != null)
|
||||
infoStream.print(" test: check integrity.........");
|
||||
reader.checkIntegrity();
|
||||
|
||||
final int numDocs = reader.numDocs();
|
||||
toLoseDocCount = numDocs;
|
||||
|
|
|
@ -423,4 +423,9 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
return in.getDocsWithField(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
ensureOpen();
|
||||
in.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2651,7 +2651,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
|
|||
false, codec, null);
|
||||
|
||||
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
|
||||
MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
|
||||
MergeState.CheckAbort.NONE, globalFieldNumberMap,
|
||||
context, config.getCheckIntegrityAtMerge());
|
||||
|
||||
if (!merger.shouldMerge()) {
|
||||
return;
|
||||
|
@ -4051,7 +4052,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
|
|||
// OneMerge to return a view over the actual segments to merge
|
||||
final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
|
||||
merge.info.info, infoStream, dirWrapper,
|
||||
checkAbort, globalFieldNumberMap, context);
|
||||
checkAbort, globalFieldNumberMap,
|
||||
context, config.getCheckIntegrityAtMerge());
|
||||
|
||||
merge.checkAborted(directory);
|
||||
|
||||
|
|
|
@ -110,6 +110,12 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
|
|||
* (set to <code>true</code>). For batch indexing with very large
|
||||
* ram buffers use <code>false</code> */
|
||||
public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true;
|
||||
|
||||
/** Default value for calling {@link AtomicReader#checkIntegrity()} before
|
||||
* merging segments (set to <code>false</code>). You can set this
|
||||
* to <code>true</code> for additional safety. */
|
||||
public final static boolean DEFAULT_CHECK_INTEGRITY_AT_MERGE = false;
|
||||
|
||||
/**
|
||||
* Sets the default (for any instance) maximum time to wait for a write lock
|
||||
* (in milliseconds).
|
||||
|
|
|
@ -97,6 +97,9 @@ public class LiveIndexWriterConfig {
|
|||
|
||||
/** True if segment flushes should use compound file format */
|
||||
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
|
||||
|
||||
/** True if merging should check integrity of segments before merge */
|
||||
protected volatile boolean checkIntegrityAtMerge = IndexWriterConfig.DEFAULT_CHECK_INTEGRITY_AT_MERGE;
|
||||
|
||||
// used by IndexWriterConfig
|
||||
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
|
||||
|
@ -152,6 +155,7 @@ public class LiveIndexWriterConfig {
|
|||
flushPolicy = config.getFlushPolicy();
|
||||
perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
|
||||
useCompoundFile = config.getUseCompoundFile();
|
||||
checkIntegrityAtMerge = config.getCheckIntegrityAtMerge();
|
||||
}
|
||||
|
||||
/** Returns the default analyzer to use for indexing documents. */
|
||||
|
@ -475,6 +479,26 @@ public class LiveIndexWriterConfig {
|
|||
return useCompoundFile ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets if {@link IndexWriter} should call {@link AtomicReader#checkIntegrity()}
|
||||
* on existing segments before merging them into a new one.
|
||||
* <p>
|
||||
* Use <code>true</code> to enable this safety check, which can help
|
||||
* reduce the risk of propagating index corruption from older segments
|
||||
* into new ones, at the expense of slower merging.
|
||||
* </p>
|
||||
*/
|
||||
public LiveIndexWriterConfig setCheckIntegrityAtMerge(boolean checkIntegrityAtMerge) {
|
||||
this.checkIntegrityAtMerge = checkIntegrityAtMerge;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns true if {@link AtomicReader#checkIntegrity()} is called before
|
||||
* merging segments. */
|
||||
public boolean getCheckIntegrityAtMerge() {
|
||||
return checkIntegrityAtMerge;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
@ -499,6 +523,7 @@ public class LiveIndexWriterConfig {
|
|||
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
|
||||
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
|
||||
sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
|
||||
sb.append("checkIntegrityAtMerge=").append(getCheckIntegrityAtMerge()).append("\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
|
|
@ -299,4 +299,12 @@ public class ParallelAtomicReader extends AtomicReader {
|
|||
NumericDocValues values = reader == null ? null : reader.getNormValues(field);
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
ensureOpen();
|
||||
for (AtomicReader reader : completeReaderSet) {
|
||||
reader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,14 +95,4 @@ final class SegmentDocValues {
|
|||
IOUtils.reThrow(t);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns approximate RAM bytes used. */
|
||||
synchronized long ramBytesUsed() {
|
||||
long ramBytesUsed = 0;
|
||||
for (RefCount<DocValuesProducer> dvp : genDVProducers.values()) {
|
||||
ramBytesUsed += dvp.get().ramBytesUsed();
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -36,11 +36,9 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexOutput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.NoSuchDirectoryException;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -69,10 +67,10 @@ import org.apache.lucene.util.IOUtils;
|
|||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>segments.gen</tt>: GenHeader, Generation, Generation
|
||||
* <li><tt>segments.gen</tt>: GenHeader, Generation, Generation, Footer
|
||||
* <li><tt>segments_N</tt>: Header, Version, NameCounter, SegCount,
|
||||
* <SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, UpdatesFiles><sup>SegCount</sup>,
|
||||
* CommitUserData, Checksum
|
||||
* CommitUserData, Footer
|
||||
* </ul>
|
||||
* </p>
|
||||
* Data types:
|
||||
|
@ -84,6 +82,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
* <li>SegName, SegCodec --> {@link DataOutput#writeString String}</li>
|
||||
* <li>CommitUserData --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||
* <li>UpdatesFiles --> {@link DataOutput#writeStringSet(Set) Set<String>}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* Field Descriptions:
|
||||
|
@ -98,9 +97,6 @@ import org.apache.lucene.util.IOUtils;
|
|||
* there are no deletes. Anything above zero means there are deletes
|
||||
* stored by {@link LiveDocsFormat}.</li>
|
||||
* <li>DeletionCount records the number of deleted documents in this segment.</li>
|
||||
* <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up
|
||||
* until the checksum. This is used to verify integrity of the file on opening the
|
||||
* index.</li>
|
||||
* <li>SegCodec is the {@link Codec#getName() name} of the Codec that encoded
|
||||
* this segment.</li>
|
||||
* <li>CommitUserData stores an optional user-supplied opaque
|
||||
|
@ -122,10 +118,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
|
||||
/** The file format version for the segments_N codec header, since 4.6+. */
|
||||
public static final int VERSION_46 = 1;
|
||||
|
||||
/** The file format version for the segments_N codec header, since 4.8+ */
|
||||
public static final int VERSION_48 = 2;
|
||||
|
||||
/** Used for the segments.gen file only!
|
||||
* Whenever you add a new format, make it 1 smaller (negative version logic)! */
|
||||
public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2;
|
||||
// Used for the segments.gen file only!
|
||||
// Whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||
private static final int FORMAT_SEGMENTS_GEN_47 = -2;
|
||||
private static final int FORMAT_SEGMENTS_GEN_CHECKSUM = -3;
|
||||
private static final int FORMAT_SEGMENTS_GEN_START = FORMAT_SEGMENTS_GEN_47;
|
||||
/** Current format of segments.gen */
|
||||
public static final int FORMAT_SEGMENTS_GEN_CURRENT = FORMAT_SEGMENTS_GEN_CHECKSUM;
|
||||
|
||||
/** Used to name new segments. */
|
||||
public int counter;
|
||||
|
@ -266,6 +269,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT);
|
||||
genOutput.writeLong(generation);
|
||||
genOutput.writeLong(generation);
|
||||
CodecUtil.writeFooter(genOutput);
|
||||
} finally {
|
||||
genOutput.close();
|
||||
dir.sync(Collections.singleton(IndexFileNames.SEGMENTS_GEN));
|
||||
|
@ -317,7 +321,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
|
||||
lastGeneration = generation;
|
||||
|
||||
ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
|
||||
ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ);
|
||||
try {
|
||||
// NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
|
||||
// to read the magic ourselves.
|
||||
|
@ -326,7 +330,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
||||
}
|
||||
// 4.0+
|
||||
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_46);
|
||||
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_48);
|
||||
version = input.readLong();
|
||||
counter = input.readInt();
|
||||
int numSegments = input.readInt();
|
||||
|
@ -366,10 +370,15 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
}
|
||||
userData = input.readStringStringMap();
|
||||
|
||||
final long checksumNow = input.getChecksum();
|
||||
final long checksumThen = input.readLong();
|
||||
if (checksumNow != checksumThen) {
|
||||
throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
|
||||
if (format >= VERSION_48) {
|
||||
CodecUtil.checkFooter(input);
|
||||
} else {
|
||||
final long checksumNow = input.getChecksum();
|
||||
final long checksumThen = input.readLong();
|
||||
if (checksumNow != checksumThen) {
|
||||
throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
|
||||
}
|
||||
CodecUtil.checkEOF(input);
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
@ -402,7 +411,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
|
||||
// Only non-null after prepareCommit has been called and
|
||||
// before finishCommit is called
|
||||
ChecksumIndexOutput pendingSegnOutput;
|
||||
IndexOutput pendingSegnOutput;
|
||||
|
||||
private void write(Directory directory) throws IOException {
|
||||
|
||||
|
@ -415,12 +424,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
generation++;
|
||||
}
|
||||
|
||||
ChecksumIndexOutput segnOutput = null;
|
||||
IndexOutput segnOutput = null;
|
||||
boolean success = false;
|
||||
|
||||
try {
|
||||
segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName, IOContext.DEFAULT));
|
||||
CodecUtil.writeHeader(segnOutput, "segments", VERSION_46);
|
||||
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
|
||||
CodecUtil.writeHeader(segnOutput, "segments", VERSION_48);
|
||||
segnOutput.writeLong(version);
|
||||
segnOutput.writeInt(counter); // write counter
|
||||
segnOutput.writeInt(size()); // write infos
|
||||
|
@ -641,9 +650,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
// a stale cache (NFS) we have a better chance of
|
||||
// getting the right generation.
|
||||
long genB = -1;
|
||||
IndexInput genInput = null;
|
||||
ChecksumIndexInput genInput = null;
|
||||
try {
|
||||
genInput = directory.openInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
|
||||
genInput = directory.openChecksumInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
|
||||
} catch (IOException e) {
|
||||
if (infoStream != null) {
|
||||
message("segments.gen open: IOException " + e);
|
||||
|
@ -653,18 +662,23 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
if (genInput != null) {
|
||||
try {
|
||||
int version = genInput.readInt();
|
||||
if (version == FORMAT_SEGMENTS_GEN_CURRENT) {
|
||||
if (version == FORMAT_SEGMENTS_GEN_47 || version == FORMAT_SEGMENTS_GEN_CHECKSUM) {
|
||||
long gen0 = genInput.readLong();
|
||||
long gen1 = genInput.readLong();
|
||||
if (infoStream != null) {
|
||||
message("fallback check: " + gen0 + "; " + gen1);
|
||||
}
|
||||
if (version == FORMAT_SEGMENTS_GEN_CHECKSUM) {
|
||||
CodecUtil.checkFooter(genInput);
|
||||
} else {
|
||||
CodecUtil.checkEOF(genInput);
|
||||
}
|
||||
if (gen0 == gen1) {
|
||||
// The file is consistent.
|
||||
genB = gen0;
|
||||
}
|
||||
} else {
|
||||
throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_CURRENT, FORMAT_SEGMENTS_GEN_CURRENT);
|
||||
throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_START, FORMAT_SEGMENTS_GEN_CURRENT);
|
||||
}
|
||||
} catch (IOException err2) {
|
||||
// rethrow any format exception
|
||||
|
@ -863,7 +877,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
}
|
||||
boolean success = false;
|
||||
try {
|
||||
pendingSegnOutput.finishCommit();
|
||||
CodecUtil.writeFooter(pendingSegnOutput);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
|
|
@ -52,7 +52,13 @@ final class SegmentMerger {
|
|||
|
||||
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
|
||||
SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
|
||||
MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException {
|
||||
MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, boolean validate) throws IOException {
|
||||
// validate incoming readers
|
||||
if (validate) {
|
||||
for (AtomicReader reader : readers) {
|
||||
reader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
|
||||
directory = dir;
|
||||
this.codec = segmentInfo.getCodec();
|
||||
|
|
|
@ -33,10 +33,13 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* IndexReader implementation over a single segment.
|
||||
|
@ -72,7 +75,8 @@ public final class SegmentReader extends AtomicReader {
|
|||
}
|
||||
};
|
||||
|
||||
final Map<String,DocValuesProducer> dvProducers = new HashMap<>();
|
||||
final Map<String,DocValuesProducer> dvProducersByField = new HashMap<>();
|
||||
final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
|
||||
|
||||
final FieldInfos fieldInfos;
|
||||
|
||||
|
@ -177,12 +181,15 @@ public final class SegmentReader extends AtomicReader {
|
|||
|
||||
// System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gens=" + genInfos.keySet());
|
||||
|
||||
// TODO: can we avoid iterating over fieldinfos several times and creating maps of all this stuff if dv updates do not exist?
|
||||
|
||||
for (Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
|
||||
Long gen = e.getKey();
|
||||
List<FieldInfo> infos = e.getValue();
|
||||
DocValuesProducer dvp = segDocValues.getDocValuesProducer(gen, si, IOContext.READ, dir, dvFormat, infos);
|
||||
for (FieldInfo fi : infos) {
|
||||
dvProducers.put(fi.name, dvp);
|
||||
dvProducersByField.put(fi.name, dvp);
|
||||
dvProducers.add(dvp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -250,7 +257,7 @@ public final class SegmentReader extends AtomicReader {
|
|||
try {
|
||||
core.decRef();
|
||||
} finally {
|
||||
dvProducers.clear();
|
||||
dvProducersByField.clear();
|
||||
try {
|
||||
IOUtils.close(docValuesLocal, docsWithFieldLocal);
|
||||
} finally {
|
||||
|
@ -395,13 +402,12 @@ public final class SegmentReader extends AtomicReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
DocValuesProducer dvProducer = dvProducers.get(field);
|
||||
assert dvProducer != null;
|
||||
|
||||
Map<String,Object> dvFields = docValuesLocal.get();
|
||||
|
||||
NumericDocValues dvs = (NumericDocValues) dvFields.get(field);
|
||||
if (dvs == null) {
|
||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
||||
assert dvProducer != null;
|
||||
dvs = dvProducer.getNumeric(fi);
|
||||
dvFields.put(field, dvs);
|
||||
}
|
||||
|
@ -422,13 +428,12 @@ public final class SegmentReader extends AtomicReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
DocValuesProducer dvProducer = dvProducers.get(field);
|
||||
assert dvProducer != null;
|
||||
|
||||
Map<String,Bits> dvFields = docsWithFieldLocal.get();
|
||||
|
||||
Bits dvs = dvFields.get(field);
|
||||
if (dvs == null) {
|
||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
||||
assert dvProducer != null;
|
||||
dvs = dvProducer.getDocsWithField(fi);
|
||||
dvFields.put(field, dvs);
|
||||
}
|
||||
|
@ -444,13 +449,12 @@ public final class SegmentReader extends AtomicReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
DocValuesProducer dvProducer = dvProducers.get(field);
|
||||
assert dvProducer != null;
|
||||
|
||||
Map<String,Object> dvFields = docValuesLocal.get();
|
||||
|
||||
BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field);
|
||||
if (dvs == null) {
|
||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
||||
assert dvProducer != null;
|
||||
dvs = dvProducer.getBinary(fi);
|
||||
dvFields.put(field, dvs);
|
||||
}
|
||||
|
@ -466,13 +470,12 @@ public final class SegmentReader extends AtomicReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
DocValuesProducer dvProducer = dvProducers.get(field);
|
||||
assert dvProducer != null;
|
||||
|
||||
Map<String,Object> dvFields = docValuesLocal.get();
|
||||
|
||||
SortedDocValues dvs = (SortedDocValues) dvFields.get(field);
|
||||
if (dvs == null) {
|
||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
||||
assert dvProducer != null;
|
||||
dvs = dvProducer.getSorted(fi);
|
||||
dvFields.put(field, dvs);
|
||||
}
|
||||
|
@ -488,13 +491,12 @@ public final class SegmentReader extends AtomicReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
DocValuesProducer dvProducer = dvProducers.get(field);
|
||||
assert dvProducer != null;
|
||||
|
||||
Map<String,Object> dvFields = docValuesLocal.get();
|
||||
|
||||
SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field);
|
||||
if (dvs == null) {
|
||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
||||
assert dvProducer != null;
|
||||
dvs = dvProducer.getSortedSet(fi);
|
||||
dvFields.put(field, dvs);
|
||||
}
|
||||
|
@ -548,12 +550,45 @@ public final class SegmentReader extends AtomicReader {
|
|||
public long ramBytesUsed() {
|
||||
ensureOpen();
|
||||
long ramBytesUsed = 0;
|
||||
if (segDocValues != null) {
|
||||
ramBytesUsed += segDocValues.ramBytesUsed();
|
||||
if (dvProducers != null) {
|
||||
for (DocValuesProducer producer : dvProducers) {
|
||||
ramBytesUsed += producer.ramBytesUsed();
|
||||
}
|
||||
}
|
||||
if (core != null) {
|
||||
ramBytesUsed += core.ramBytesUsed();
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
ensureOpen();
|
||||
|
||||
// stored fields
|
||||
getFieldsReader().checkIntegrity();
|
||||
|
||||
// term vectors
|
||||
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
||||
if (termVectorsReader != null) {
|
||||
termVectorsReader.checkIntegrity();
|
||||
}
|
||||
|
||||
// terms/postings
|
||||
if (core.fields != null) {
|
||||
core.fields.checkIntegrity();
|
||||
}
|
||||
|
||||
// norms
|
||||
if (core.normsProducer != null) {
|
||||
core.normsProducer.checkIntegrity();
|
||||
}
|
||||
|
||||
// docvalues
|
||||
if (dvProducers != null) {
|
||||
for (DocValuesProducer producer : dvProducers) {
|
||||
producer.checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -239,4 +239,12 @@ public final class SlowCompositeReaderWrapper extends AtomicReader {
|
|||
// TODO: as this is a wrapper, should we really close the delegate?
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
ensureOpen();
|
||||
for (AtomicReaderContext ctx : in.leaves()) {
|
||||
ctx.reader().checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/**
|
||||
* Wraps another {@link Checksum} with an internal buffer
|
||||
* to speed up checksum calculations.
|
||||
*/
|
||||
public class BufferedChecksum implements Checksum {
|
||||
private final Checksum in;
|
||||
private final byte buffer[];
|
||||
private int upto;
|
||||
/** Default buffer size: 256 */
|
||||
public static final int DEFAULT_BUFFERSIZE = 256;
|
||||
|
||||
/** Create a new BufferedChecksum with {@link #DEFAULT_BUFFERSIZE} */
|
||||
public BufferedChecksum(Checksum in) {
|
||||
this(in, DEFAULT_BUFFERSIZE);
|
||||
}
|
||||
|
||||
/** Create a new BufferedChecksum with the specified bufferSize */
|
||||
public BufferedChecksum(Checksum in, int bufferSize) {
|
||||
this.in = in;
|
||||
this.buffer = new byte[bufferSize];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(int b) {
|
||||
if (upto == buffer.length) {
|
||||
flush();
|
||||
}
|
||||
buffer[upto++] = (byte) b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(byte[] b, int off, int len) {
|
||||
if (len >= buffer.length) {
|
||||
flush();
|
||||
in.update(b, off, len);
|
||||
} else {
|
||||
if (upto + len > buffer.length) {
|
||||
flush();
|
||||
}
|
||||
System.arraycopy(b, off, buffer, upto, len);
|
||||
upto += len;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue() {
|
||||
flush();
|
||||
return in.getValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
upto = 0;
|
||||
in.reset();
|
||||
}
|
||||
|
||||
private void flush() {
|
||||
if (upto > 0) {
|
||||
in.update(buffer, 0, upto);
|
||||
}
|
||||
upto = 0;
|
||||
}
|
||||
}
|
|
@ -21,41 +21,40 @@ import java.io.IOException;
|
|||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/** Writes bytes through to a primary IndexOutput, computing
|
||||
* checksum.
|
||||
*
|
||||
* @lucene.internal
|
||||
/**
|
||||
* Simple implementation of {@link ChecksumIndexInput} that wraps
|
||||
* another input and delegates calls.
|
||||
*/
|
||||
public class ChecksumIndexOutput extends IndexOutput {
|
||||
IndexOutput main;
|
||||
Checksum digest;
|
||||
public class BufferedChecksumIndexInput extends ChecksumIndexInput {
|
||||
final IndexInput main;
|
||||
final Checksum digest;
|
||||
|
||||
public ChecksumIndexOutput(IndexOutput main) {
|
||||
/** Creates a new BufferedChecksumIndexInput */
|
||||
public BufferedChecksumIndexInput(IndexInput main) {
|
||||
super("BufferedChecksumIndexInput(" + main + ")");
|
||||
this.main = main;
|
||||
digest = new CRC32();
|
||||
this.digest = new BufferedChecksum(new CRC32());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeByte(byte b) throws IOException {
|
||||
public byte readByte() throws IOException {
|
||||
final byte b = main.readByte();
|
||||
digest.update(b);
|
||||
main.writeByte(b);
|
||||
return b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
||||
digest.update(b, offset, length);
|
||||
main.writeBytes(b, offset, length);
|
||||
public void readBytes(byte[] b, int offset, int len)
|
||||
throws IOException {
|
||||
main.readBytes(b, offset, len);
|
||||
digest.update(b, offset, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() {
|
||||
return digest.getValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
main.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
main.close();
|
||||
|
@ -66,13 +65,8 @@ public class ChecksumIndexOutput extends IndexOutput {
|
|||
return main.getFilePointer();
|
||||
}
|
||||
|
||||
/** writes the checksum */
|
||||
public void finishCommit() throws IOException {
|
||||
main.writeLong(getChecksum());
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() throws IOException {
|
||||
public long length() {
|
||||
return main.length();
|
||||
}
|
||||
}
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.store;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
/** Base implementation class for buffered {@link IndexOutput}. */
|
||||
public abstract class BufferedIndexOutput extends IndexOutput {
|
||||
|
@ -28,6 +29,7 @@ public abstract class BufferedIndexOutput extends IndexOutput {
|
|||
private final byte[] buffer;
|
||||
private long bufferStart = 0; // position in file of buffer
|
||||
private int bufferPosition = 0; // position in buffer
|
||||
private final CRC32 crc = new CRC32();
|
||||
|
||||
/**
|
||||
* Creates a new {@link BufferedIndexOutput} with the default buffer size
|
||||
|
@ -75,6 +77,7 @@ public abstract class BufferedIndexOutput extends IndexOutput {
|
|||
if (bufferPosition > 0)
|
||||
flush();
|
||||
// and write data at once
|
||||
crc.update(b, offset, length);
|
||||
flushBuffer(b, offset, length);
|
||||
bufferStart += length;
|
||||
} else {
|
||||
|
@ -99,6 +102,7 @@ public abstract class BufferedIndexOutput extends IndexOutput {
|
|||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
crc.update(buffer, 0, bufferPosition);
|
||||
flushBuffer(buffer, bufferPosition);
|
||||
bufferStart += bufferPosition;
|
||||
bufferPosition = 0;
|
||||
|
@ -141,4 +145,9 @@ public abstract class BufferedIndexOutput extends IndexOutput {
|
|||
return bufferSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() throws IOException {
|
||||
flush();
|
||||
return crc.getValue();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -17,61 +19,24 @@ package org.apache.lucene.store;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/** Reads bytes through to a primary IndexInput, computing
|
||||
* checksum as it goes. Note that you cannot use seek().
|
||||
*
|
||||
* @lucene.internal
|
||||
/**
|
||||
* Extension of IndexInput, computing checksum as it goes.
|
||||
* Callers can retrieve the checksum via {@link #getChecksum()}.
|
||||
*/
|
||||
public class ChecksumIndexInput extends IndexInput {
|
||||
IndexInput main;
|
||||
Checksum digest;
|
||||
|
||||
public ChecksumIndexInput(IndexInput main) {
|
||||
super("ChecksumIndexInput(" + main + ")");
|
||||
this.main = main;
|
||||
digest = new CRC32();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte() throws IOException {
|
||||
final byte b = main.readByte();
|
||||
digest.update(b);
|
||||
return b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len)
|
||||
throws IOException {
|
||||
main.readBytes(b, offset, len);
|
||||
digest.update(b, offset, len);
|
||||
}
|
||||
|
||||
public abstract class ChecksumIndexInput extends IndexInput {
|
||||
|
||||
public long getChecksum() {
|
||||
return digest.getValue();
|
||||
/** resourceDescription should be a non-null, opaque string
|
||||
* describing this resource; it's returned from
|
||||
* {@link #toString}. */
|
||||
protected ChecksumIndexInput(String resourceDescription) {
|
||||
super(resourceDescription);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
main.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
return main.getFilePointer();
|
||||
}
|
||||
/** Returns the current checksum value */
|
||||
public abstract long getChecksum() throws IOException;
|
||||
|
||||
@Override
|
||||
public void seek(long pos) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() {
|
||||
return main.length();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,14 +52,15 @@ import java.io.IOException;
|
|||
* </ul>
|
||||
* <p>Description:</p>
|
||||
* <ul>
|
||||
* <li>Compound (.cfs) --> Header, FileData <sup>FileCount</sup></li>
|
||||
* <li>Compound (.cfs) --> Header, FileData <sup>FileCount</sup>, Footer</li>
|
||||
* <li>Compound Entry Table (.cfe) --> Header, FileCount, <FileName,
|
||||
* DataOffset, DataLength> <sup>FileCount</sup></li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>FileCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>DataOffset,DataLength --> {@link DataOutput#writeLong UInt64}</li>
|
||||
* <li>DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64}</li>
|
||||
* <li>FileName --> {@link DataOutput#writeString String}</li>
|
||||
* <li>FileData --> raw file data</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
|
@ -87,6 +88,7 @@ public final class CompoundFileDirectory extends BaseDirectory {
|
|||
private static final Map<String,FileEntry> SENTINEL = Collections.emptyMap();
|
||||
private final CompoundFileWriter writer;
|
||||
private final IndexInputSlicer handle;
|
||||
private int version;
|
||||
|
||||
/**
|
||||
* Create a new CompoundFileDirectory.
|
||||
|
@ -120,15 +122,15 @@ public final class CompoundFileDirectory extends BaseDirectory {
|
|||
}
|
||||
|
||||
/** Helper method that reads CFS entries from an input stream */
|
||||
private static final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException {
|
||||
private final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException {
|
||||
IOException priorE = null;
|
||||
IndexInput entriesStream = null;
|
||||
ChecksumIndexInput entriesStream = null;
|
||||
try {
|
||||
final String entriesFileName = IndexFileNames.segmentFileName(
|
||||
IndexFileNames.stripExtension(name), "",
|
||||
IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
|
||||
entriesStream = dir.openInput(entriesFileName, IOContext.READONCE);
|
||||
CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_START);
|
||||
entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE);
|
||||
version = CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
|
||||
final int numEntries = entriesStream.readVInt();
|
||||
final Map<String, FileEntry> mapping = new HashMap<>(numEntries);
|
||||
for (int i = 0; i < numEntries; i++) {
|
||||
|
@ -141,8 +143,10 @@ public final class CompoundFileDirectory extends BaseDirectory {
|
|||
fileEntry.offset = entriesStream.readLong();
|
||||
fileEntry.length = entriesStream.readLong();
|
||||
}
|
||||
if (entriesStream.getFilePointer() != entriesStream.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + entriesFileName + "\": read " + entriesStream.getFilePointer() + " vs size " + entriesStream.length() + " (resource: " + entriesStream + ")");
|
||||
if (version >= CompoundFileWriter.VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(entriesStream);
|
||||
} else {
|
||||
CodecUtil.checkEOF(entriesStream);
|
||||
}
|
||||
return mapping;
|
||||
} catch (IOException ioe) {
|
||||
|
|
|
@ -54,7 +54,8 @@ final class CompoundFileWriter implements Closeable{
|
|||
// versioning for the .cfs file
|
||||
static final String DATA_CODEC = "CompoundFileWriterData";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
static final int VERSION_CHECKSUM = 1;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
|
||||
// versioning for the .cfe file
|
||||
static final String ENTRY_CODEC = "CompoundFileWriterEntries";
|
||||
|
@ -140,6 +141,7 @@ final class CompoundFileWriter implements Closeable{
|
|||
// open the compound stream
|
||||
getOutput();
|
||||
assert dataOut != null;
|
||||
CodecUtil.writeFooter(dataOut);
|
||||
} catch (IOException e) {
|
||||
priorException = e;
|
||||
} finally {
|
||||
|
@ -202,6 +204,7 @@ final class CompoundFileWriter implements Closeable{
|
|||
entryOut.writeLong(fe.offset);
|
||||
entryOut.writeLong(fe.length);
|
||||
}
|
||||
CodecUtil.writeFooter(entryOut);
|
||||
}
|
||||
|
||||
IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
|
@ -342,6 +345,11 @@ final class CompoundFileWriter implements Closeable{
|
|||
writtenBytes += length;
|
||||
delegate.writeBytes(b, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() throws IOException {
|
||||
return delegate.getChecksum();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -100,7 +100,12 @@ public abstract class Directory implements Closeable {
|
|||
* <p>Throws {@link FileNotFoundException} or {@link NoSuchFileException}
|
||||
* if the file does not exist.
|
||||
*/
|
||||
public abstract IndexInput openInput(String name, IOContext context) throws IOException;
|
||||
public abstract IndexInput openInput(String name, IOContext context) throws IOException;
|
||||
|
||||
/** Returns a stream reading an existing file, computing checksum as it reads */
|
||||
public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException {
|
||||
return new BufferedChecksumIndexInput(openInput(name, context));
|
||||
}
|
||||
|
||||
/** Construct a {@link Lock}.
|
||||
* @param name the name of the lock file
|
||||
|
|
|
@ -43,6 +43,8 @@ public abstract class IndexOutput extends DataOutput implements Closeable {
|
|||
*/
|
||||
public abstract long getFilePointer();
|
||||
|
||||
/** Returns the current checksum of bytes written so far */
|
||||
public abstract long getChecksum() throws IOException;
|
||||
|
||||
/** The number of bytes in the file. */
|
||||
public abstract long length() throws IOException;
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.store;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/**
|
||||
* A memory-resident {@link IndexOutput} implementation.
|
||||
|
@ -35,6 +37,8 @@ public class RAMOutputStream extends IndexOutput {
|
|||
private int bufferPosition;
|
||||
private long bufferStart;
|
||||
private int bufferLength;
|
||||
|
||||
private Checksum crc = new BufferedChecksum(new CRC32());
|
||||
|
||||
/** Construct an empty output buffer. */
|
||||
public RAMOutputStream() {
|
||||
|
@ -95,6 +99,7 @@ public class RAMOutputStream extends IndexOutput {
|
|||
bufferStart = 0;
|
||||
bufferLength = 0;
|
||||
file.setLength(0);
|
||||
crc.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -113,12 +118,14 @@ public class RAMOutputStream extends IndexOutput {
|
|||
currentBufferIndex++;
|
||||
switchCurrentBuffer();
|
||||
}
|
||||
crc.update(b);
|
||||
currentBuffer[bufferPosition++] = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int len) throws IOException {
|
||||
assert b != null;
|
||||
crc.update(b, offset, len);
|
||||
while (len > 0) {
|
||||
if (bufferPosition == bufferLength) {
|
||||
currentBufferIndex++;
|
||||
|
@ -165,5 +172,10 @@ public class RAMOutputStream extends IndexOutput {
|
|||
/** Returns byte usage of all buffers. */
|
||||
public long sizeInBytes() {
|
||||
return (long) file.numBuffers() * (long) BUFFER_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() throws IOException {
|
||||
return crc.getValue();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.store.CompoundFileDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/**
|
||||
* Test that a plain default puts CRC32 footers in all files.
|
||||
*/
|
||||
public class TestAllFilesHaveChecksumFooter extends LuceneTestCase {
|
||||
public void test() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
conf.setCodec(new Lucene46Codec());
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||
Document doc = new Document();
|
||||
// these fields should sometimes get term vectors, etc
|
||||
Field idField = newStringField("id", "", Field.Store.NO);
|
||||
Field bodyField = newTextField("body", "", Field.Store.NO);
|
||||
Field dvField = new NumericDocValuesField("dv", 5);
|
||||
doc.add(idField);
|
||||
doc.add(bodyField);
|
||||
doc.add(dvField);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
idField.setStringValue(Integer.toString(i));
|
||||
bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
|
||||
riw.addDocument(doc);
|
||||
if (random().nextInt(7) == 0) {
|
||||
riw.commit();
|
||||
}
|
||||
if (random().nextInt(20) == 0) {
|
||||
riw.deleteDocuments(new Term("id", Integer.toString(i)));
|
||||
}
|
||||
}
|
||||
riw.close();
|
||||
checkHeaders(dir);
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void checkHeaders(Directory dir) throws IOException {
|
||||
for (String file : dir.listAll()) {
|
||||
if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) {
|
||||
CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false);
|
||||
checkHeaders(cfsDir); // recurse into cfs
|
||||
cfsDir.close();
|
||||
}
|
||||
IndexInput in = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
in = dir.openInput(file, newIOContext(random()));
|
||||
CodecUtil.checksumEntireFile(in);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(in);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(in);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.store.CompoundFileDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -39,14 +40,15 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
|
|||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
conf.setCodec(new Lucene46Codec());
|
||||
// riw should sometimes create docvalues fields, etc
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||
Document doc = new Document();
|
||||
// these fields should sometimes get term vectors, etc
|
||||
Field idField = newStringField("id", "", Field.Store.NO);
|
||||
Field bodyField = newTextField("body", "", Field.Store.NO);
|
||||
Field dvField = new NumericDocValuesField("dv", 5);
|
||||
doc.add(idField);
|
||||
doc.add(bodyField);
|
||||
doc.add(dvField);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
idField.setStringValue(Integer.toString(i));
|
||||
bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
|
||||
|
@ -54,6 +56,10 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
|
|||
if (random().nextInt(7) == 0) {
|
||||
riw.commit();
|
||||
}
|
||||
// TODO: we should make a new format with a clean header...
|
||||
// if (random().nextInt(20) == 0) {
|
||||
// riw.deleteDocuments(new Term("id", Integer.toString(i)));
|
||||
// }
|
||||
}
|
||||
riw.close();
|
||||
checkHeaders(dir);
|
||||
|
|
|
@ -222,7 +222,7 @@ public class TestDoc extends LuceneTestCase {
|
|||
|
||||
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
|
||||
si, InfoStream.getDefault(), trackingDir,
|
||||
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context);
|
||||
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true);
|
||||
|
||||
MergeState mergeState = merger.merge();
|
||||
r1.close();
|
||||
|
|
|
@ -83,7 +83,7 @@ public class TestSegmentMerger extends LuceneTestCase {
|
|||
|
||||
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(reader1, reader2),
|
||||
si, InfoStream.getDefault(), mergedDir,
|
||||
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()));
|
||||
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()), true);
|
||||
MergeState mergeState = merger.merge();
|
||||
int docsMerged = mergeState.segmentInfo.getDocCount();
|
||||
assertTrue(docsMerged == 2);
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestBufferedChecksum extends LuceneTestCase {
|
||||
|
||||
public void testSimple() {
|
||||
Checksum c = new BufferedChecksum(new CRC32());
|
||||
c.update(1);
|
||||
c.update(2);
|
||||
c.update(3);
|
||||
assertEquals(1438416925L, c.getValue());
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
Checksum c1 = new CRC32();
|
||||
Checksum c2 = new BufferedChecksum(new CRC32());
|
||||
int iterations = atLeast(10000);
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
switch(random().nextInt(4)) {
|
||||
case 0:
|
||||
// update(byte[], int, int)
|
||||
int length = random().nextInt(1024);
|
||||
byte bytes[] = new byte[length];
|
||||
random().nextBytes(bytes);
|
||||
c1.update(bytes, 0, bytes.length);
|
||||
c2.update(bytes, 0, bytes.length);
|
||||
break;
|
||||
case 1:
|
||||
// update(int)
|
||||
int b = random().nextInt(256);
|
||||
c1.update(b);
|
||||
c2.update(b);
|
||||
break;
|
||||
case 2:
|
||||
// reset()
|
||||
c1.reset();
|
||||
c2.reset();
|
||||
break;
|
||||
case 3:
|
||||
// getValue()
|
||||
assertEquals(c1.getValue(), c2.getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertEquals(c1.getValue(), c2.getValue());
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue