LUCENE-2446: add checksums to index files

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1583550 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-04-01 07:36:45 +00:00
parent aacd7ee80f
commit c189d0fb74
118 changed files with 1560 additions and 535 deletions

View File

@ -138,6 +138,13 @@ New Features
* LUCENE-5558: Add TruncateTokenFilter which truncates terms to
the specified length. (Ahmet Arslan via Robert Muir)
* LUCENE-2446: Added checksums to lucene index files. As of 4.8, the last 8
bytes of each file contain a zlib-crc32 checksum. Small metadata files are
verified on load. Larger files can be checked on demand via
AtomicReader.checkIntegrity. You can configure this to happen automatically
before merges by enabling IndexWriterConfig.setCheckIntegrityAtMerge.
(Robert Muir)
API Changes
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues

View File

@ -177,7 +177,10 @@ public class BlockTermsReader extends FieldsProducer {
}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
input.seek(input.length() - CodecUtil.footerLength() - 8);
dirOffset = input.readLong();
} else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
@ -863,4 +866,14 @@ public class BlockTermsReader extends FieldsProducer {
sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0;
return sizeInBytes;
}
@Override
public void checkIntegrity() throws IOException {
// verify terms
if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(in);
}
// verify postings
postingsReader.checkIntegrity();
}
}

View File

@ -63,12 +63,13 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
public static final int VERSION_START = 0;
public static final int VERSION_APPEND_ONLY = 1;
public static final int VERSION_META_ARRAY = 2;
public static final int VERSION_CURRENT = VERSION_META_ARRAY;
public static final int VERSION_CHECKSUM = 3;
public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of terms file */
static final String TERMS_EXTENSION = "tib";
protected final IndexOutput out;
protected IndexOutput out;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
FieldInfo currentField;
@ -176,6 +177,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
}
public void close() throws IOException {
if (out != null) {
try {
final long dirStart = out.getFilePointer();
@ -194,8 +196,11 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
}
}
writeTrailer(dirStart);
CodecUtil.writeFooter(out);
} finally {
IOUtils.close(out, postingsWriter, termsIndexWriter);
out = null;
}
}
}

View File

@ -66,6 +66,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
// start of the field info data
private long dirOffset;
private int version;
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
throws IOException {
@ -78,6 +80,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
try {
readHeader(in);
if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(in);
}
indexInterval = in.readVInt();
if (indexInterval < 1) {
throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
@ -124,7 +131,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
}
private void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT);
}
@ -273,7 +280,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
public void close() throws IOException {}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
input.seek(input.length() - CodecUtil.footerLength() - 8);
} else {
input.seek(input.length() - 8);
}
dirOffset = input.readLong();
input.seek(dirOffset);
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
@ -43,7 +42,7 @@ import java.io.IOException;
*
* @lucene.experimental */
public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
protected final IndexOutput out;
protected IndexOutput out;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tii";
@ -52,7 +51,8 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
final static int VERSION_START = 0;
final static int VERSION_APPEND_ONLY = 1;
final static int VERSION_MONOTONIC_ADDRESSING = 2;
final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING;
final static int VERSION_CHECKSUM = 3;
final static int VERSION_CURRENT = VERSION_CHECKSUM;
final static int BLOCKSIZE = 4096;
final private int termIndexInterval;
@ -207,6 +207,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
@Override
public void close() throws IOException {
if (out != null) {
boolean success = false;
try {
final long dirStart = out.getFilePointer();
@ -233,6 +234,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
}
}
writeTrailer(dirStart);
CodecUtil.writeFooter(out);
success = true;
} finally {
if (success) {
@ -240,6 +242,8 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
} else {
IOUtils.closeWhileHandlingException(out);
}
out = null;
}
}
}

View File

@ -63,6 +63,10 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
version = readHeader(in);
if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(in);
}
seekDir(in, dirOffset);
// Read directory
@ -190,7 +194,10 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
public void close() throws IOException {}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
input.seek(input.length() - CodecUtil.footerLength() - 8);
dirOffset = input.readLong();
} else if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}

View File

@ -45,7 +45,7 @@ import org.apache.lucene.util.fst.Util;
*
* @lucene.experimental */
public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
protected final IndexOutput out;
protected IndexOutput out;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tiv";
@ -53,7 +53,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
final static String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX";
final static int VERSION_START = 0;
final static int VERSION_APPEND_ONLY = 1;
final static int VERSION_CURRENT = VERSION_APPEND_ONLY;
final static int VERSION_CHECKSUM = 2;
final static int VERSION_CURRENT = VERSION_CHECKSUM;
private final List<FSTFieldWriter> fields = new ArrayList<>();
@ -290,6 +291,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
@Override
public void close() throws IOException {
if (out != null) {
try {
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
@ -311,8 +313,11 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
}
}
writeTrailer(dirStart);
CodecUtil.writeFooter(out);
} finally {
out.close();
out = null;
}
}
}

View File

@ -39,8 +39,8 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -66,7 +66,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
* </p>
* <ul>
* <li>BloomFilter (.blm) --&gt; Header, DelegatePostingsFormatName,
* NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
* NumFilteredFields, Filter<sup>NumFilteredFields</sup>, Footer</li>
* <li>Filter --&gt; FieldNumber, FuzzySet</li>
* <li>FuzzySet --&gt;See {@link FuzzySet#serialize(DataOutput)}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
@ -75,13 +75,16 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
* <li>NumFilteredFields --&gt; {@link DataOutput#writeInt Uint32}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt Uint32} The number of the
* field in this segment</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* @lucene.experimental
*/
public final class BloomFilteringPostingsFormat extends PostingsFormat {
public static final String BLOOM_CODEC_NAME = "BloomFilter";
public static final int BLOOM_CODEC_VERSION = 1;
public static final int VERSION_START = 1;
public static final int VERSION_CHECKSUM = 2;
public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of Bloom Filters file */
static final String BLOOM_EXTENSION = "blm";
@ -157,12 +160,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexInput bloomIn = null;
ChecksumIndexInput bloomIn = null;
boolean success = false;
try {
bloomIn = state.directory.openInput(bloomFileName, state.context);
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
BLOOM_CODEC_VERSION);
bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT);
// // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format
@ -178,6 +180,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
bloomsByFieldName.put(fieldInfo.name, bloom);
}
if (version >= VERSION_CHECKSUM) {
CodecUtil.checkFooter(bloomIn);
} else {
CodecUtil.checkEOF(bloomIn);
}
IOUtils.close(bloomIn);
success = true;
} finally {
@ -390,6 +397,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
}
return sizeInBytes;
}
@Override
public void checkIntegrity() throws IOException {
delegateFieldsProducer.checkIntegrity();
}
}
class BloomFilteredFieldsConsumer extends FieldsConsumer {
@ -466,10 +478,8 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexOutput bloomOutput = null;
try {
bloomOutput = state.directory
.createOutput(bloomFileName, state.context);
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
BLOOM_CODEC_VERSION);
bloomOutput = state.directory.createOutput(bloomFileName, state.context);
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, VERSION_CURRENT);
// remember the name of the postings format we will delegate to
bloomOutput.writeString(delegatePostingsFormat.getName());
@ -481,6 +491,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
bloomOutput.writeInt(fieldInfo.number);
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
}
CodecUtil.writeFooter(bloomOutput);
} finally {
IOUtils.close(bloomOutput);
}

View File

@ -40,7 +40,7 @@ import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER;
*/
class DirectDocValuesConsumer extends DocValuesConsumer {
final IndexOutput data, meta;
IndexOutput data, meta;
final int maxDoc;
DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
@ -142,6 +142,10 @@ class DirectDocValuesConsumer extends DocValuesConsumer {
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
CodecUtil.writeFooter(meta); // write checksum
}
if (data != null) {
CodecUtil.writeFooter(data);
}
success = true;
} finally {
@ -150,6 +154,7 @@ class DirectDocValuesConsumer extends DocValuesConsumer {
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
data = meta = null;
}
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -65,6 +66,7 @@ class DirectDocValuesProducer extends DocValuesProducer {
private final int maxDoc;
private final AtomicLong ramBytesUsed;
private final int version;
static final byte NUMBER = 0;
static final byte BYTES = 1;
@ -72,22 +74,27 @@ class DirectDocValuesProducer extends DocValuesProducer {
static final byte SORTED_SET = 3;
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final int VERSION_CHECKSUM = 1;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
boolean success = false;
final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
VERSION_START,
VERSION_CURRENT);
readFields(in);
if (version >= VERSION_CHECKSUM) {
CodecUtil.checkFooter(in);
} else {
CodecUtil.checkEOF(in);
}
success = true;
} finally {
if (success) {
@ -185,6 +192,13 @@ class DirectDocValuesProducer extends DocValuesProducer {
return ramBytesUsed.get();
}
@Override
public void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(data);
}
}
@Override
public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericDocValues instance = numericInstances.get(field.number);

View File

@ -109,6 +109,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
if (state.context.context != IOContext.Context.MERGE) {
FieldsProducer loadedPostings;
try {
postings.checkIntegrity();
loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
} finally {
postings.close();
@ -157,6 +158,12 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
return sizeInBytes;
}
@Override
public void checkIntegrity() throws IOException {
// if we read entirely into ram, we already validated.
// otherwise returned the raw postings reader
}
}
private final static class DirectField extends Terms {

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
@ -56,14 +57,13 @@ import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.memory.FSTTermsReader.TermsReader;
/**
* FST-based terms dictionary reader.
*
* The FST index maps each term and its ord, and during seek
* the ord is used fetch metadata from a single block.
* The term dictionary is fully memeory resident.
* The term dictionary is fully memory resident.
*
* @lucene.experimental
*/
@ -71,8 +71,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
final TreeMap<String, TermsReader> fields = new TreeMap<>();
final PostingsReaderBase postingsReader;
IndexInput indexIn = null;
IndexInput blockIn = null;
int version;
//static final boolean TEST = false;
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
@ -80,11 +79,18 @@ public class FSTOrdTermsReader extends FieldsProducer {
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
this.postingsReader = postingsReader;
ChecksumIndexInput indexIn = null;
IndexInput blockIn = null;
boolean success = false;
try {
this.indexIn = state.directory.openInput(termsIndexFileName, state.context);
this.blockIn = state.directory.openInput(termsBlockFileName, state.context);
readHeader(indexIn);
indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context);
blockIn = state.directory.openInput(termsBlockFileName, state.context);
version = readHeader(indexIn);
readHeader(blockIn);
if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(blockIn);
}
this.postingsReader.init(blockIn);
seekDir(blockIn);
@ -100,14 +106,24 @@ public class FSTOrdTermsReader extends FieldsProducer {
int longsSize = blockIn.readVInt();
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
TermsReader previous = fields.put(fieldInfo.name, current);
checkFieldSummary(state.segmentInfo, current, previous);
checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous);
}
if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
CodecUtil.checkFooter(indexIn);
} else {
CodecUtil.checkEOF(indexIn);
}
success = true;
} finally {
if (success) {
IOUtils.close(indexIn, blockIn);
} else {
IOUtils.closeWhileHandlingException(indexIn, blockIn);
}
}
}
private int readHeader(IndexInput in) throws IOException {
return CodecUtil.checkHeader(in, FSTOrdTermsWriter.TERMS_CODEC_NAME,
@ -115,10 +131,14 @@ public class FSTOrdTermsReader extends FieldsProducer {
FSTOrdTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
in.seek(in.length() - CodecUtil.footerLength() - 8);
} else {
in.seek(in.length() - 8);
}
in.seek(in.readLong());
}
private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")");
@ -176,7 +196,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
final byte[] metaLongsBlock;
final byte[] metaBytesBlock;
TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@ -819,4 +839,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
return ramBytesUsed;
}
@Override
public void checkIntegrity() throws IOException {
postingsReader.checkIntegrity();
}
}

View File

@ -73,9 +73,10 @@ import org.apache.lucene.util.fst.Util;
* </p>
*
* <ul>
* <li>TermIndex(.tix) --&gt; Header, TermFST<sup>NumFields</sup></li>
* <li>TermIndex(.tix) --&gt; Header, TermFST<sup>NumFields</sup>, Footer</li>
* <li>TermFST --&gt; {@link FST FST&lt;long&gt;}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
*
* <p>Notes:</p>
@ -103,7 +104,7 @@ import org.apache.lucene.util.fst.Util;
* <ul>
* <li>TermBlock(.tbk) --&gt; Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
* DocCount, LongsSize, DataBlock &gt; <sup>NumFields</sup></li>
* DocCount, LongsSize, DataBlock &gt; <sup>NumFields</sup>, Footer</li>
*
* <li>DataBlock --&gt; StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
@ -119,6 +120,7 @@ import org.apache.lucene.util.fst.Util;
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
* LongDelta,--&gt; {@link DataOutput#writeVLong VLong}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes: </p>
* <ul>
@ -148,7 +150,8 @@ public class FSTOrdTermsWriter extends FieldsConsumer {
static final String TERMS_BLOCK_EXTENSION = "tbk";
static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
public static final int TERMS_VERSION_CHECKSUM = 1;
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
public static final int SKIP_INTERVAL = 8;
final PostingsWriterBase postingsWriter;
@ -218,6 +221,7 @@ public class FSTOrdTermsWriter extends FieldsConsumer {
}
public void close() throws IOException {
if (blockOut != null) {
IOException ioe = null;
try {
final long blockDirStart = blockOut.getFilePointer();
@ -244,10 +248,14 @@ public class FSTOrdTermsWriter extends FieldsConsumer {
field.dict.save(indexOut);
}
writeTrailer(blockOut, blockDirStart);
CodecUtil.writeFooter(indexOut);
CodecUtil.writeFooter(blockOut);
} catch (IOException ioe2) {
ioe = ioe2;
} finally {
IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
blockOut = null;
}
}
}

View File

@ -59,7 +59,7 @@ import org.apache.lucene.codecs.CodecUtil;
* FST-based terms dictionary reader.
*
* The FST directly maps each term and its metadata,
* it is memeory resident.
* it is memory resident.
*
* @lucene.experimental
*/
@ -67,18 +67,21 @@ import org.apache.lucene.codecs.CodecUtil;
public class FSTTermsReader extends FieldsProducer {
final TreeMap<String, TermsReader> fields = new TreeMap<>();
final PostingsReaderBase postingsReader;
final IndexInput in;
//static boolean TEST = false;
final int version;
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
this.in = state.directory.openInput(termsFileName, state.context);
final IndexInput in = state.directory.openInput(termsFileName, state.context);
boolean success = false;
try {
readHeader(in);
version = readHeader(in);
if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(in);
}
this.postingsReader.init(in);
seekDir(in);
@ -92,13 +95,15 @@ public class FSTTermsReader extends FieldsProducer {
long sumDocFreq = in.readVLong();
int docCount = in.readVInt();
int longsSize = in.readVInt();
TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
TermsReader previous = fields.put(fieldInfo.name, current);
checkFieldSummary(state.segmentInfo, current, previous);
checkFieldSummary(state.segmentInfo, in, current, previous);
}
success = true;
} finally {
if (!success) {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
@ -110,10 +115,14 @@ public class FSTTermsReader extends FieldsProducer {
FSTTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
in.seek(in.length() - CodecUtil.footerLength() - 8);
} else {
in.seek(in.length() - 8);
}
in.seek(in.readLong());
}
private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
private void checkFieldSummary(SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
@ -150,7 +159,7 @@ public class FSTTermsReader extends FieldsProducer {
@Override
public void close() throws IOException {
try {
IOUtils.close(in, postingsReader);
IOUtils.close(postingsReader);
} finally {
fields.clear();
}
@ -165,7 +174,7 @@ public class FSTTermsReader extends FieldsProducer {
final int longsSize;
final FST<FSTTermOutputs.TermData> dict;
TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@ -729,4 +738,9 @@ public class FSTTermsReader extends FieldsProducer {
}
return ramBytesUsed;
}
@Override
public void checkIntegrity() throws IOException {
postingsReader.checkIntegrity();
}
}

View File

@ -124,11 +124,12 @@ public class FSTTermsWriter extends FieldsConsumer {
static final String TERMS_EXTENSION = "tmp";
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
public static final int TERMS_VERSION_CHECKSUM = 1;
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
final IndexOutput out;
IndexOutput out;
final int maxDoc;
final List<FieldMetaData> fields = new ArrayList<>();
@ -199,6 +200,7 @@ public class FSTTermsWriter extends FieldsConsumer {
}
public void close() throws IOException {
if (out != null) {
IOException ioe = null;
try {
// write field summary
@ -217,10 +219,13 @@ public class FSTTermsWriter extends FieldsConsumer {
field.dict.save(out);
}
writeTrailer(out, dirStart);
CodecUtil.writeFooter(out);
} catch (IOException ioe2) {
ioe = ioe2;
} finally {
IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
out = null;
}
}
}

View File

@ -59,7 +59,7 @@ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.UNCOMPRESS
* Writer for {@link MemoryDocValuesFormat}
*/
class MemoryDocValuesConsumer extends DocValuesConsumer {
final IndexOutput data, meta;
IndexOutput data, meta;
final int maxDoc;
final float acceptableOverheadRatio;
@ -208,6 +208,10 @@ class MemoryDocValuesConsumer extends DocValuesConsumer {
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
CodecUtil.writeFooter(meta); // write checksum
}
if (data != null) {
CodecUtil.writeFooter(data);
}
success = true;
} finally {
@ -216,6 +220,7 @@ class MemoryDocValuesConsumer extends DocValuesConsumer {
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
data = meta = null;
}
}

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -77,6 +78,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
private final int maxDoc;
private final AtomicLong ramBytesUsed;
private final int version;
static final byte NUMBER = 0;
static final byte BYTES = 1;
@ -91,15 +93,15 @@ class MemoryDocValuesProducer extends DocValuesProducer {
static final int VERSION_START = 0;
static final int VERSION_GCD_COMPRESSION = 1;
static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
static final int VERSION_CHECKSUM = 2;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
boolean success = false;
final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
VERSION_START,
@ -108,6 +110,11 @@ class MemoryDocValuesProducer extends DocValuesProducer {
binaries = new HashMap<>();
fsts = new HashMap<>();
readFields(in, state.fieldInfos);
if (version >= VERSION_CHECKSUM) {
CodecUtil.checkFooter(in);
} else {
CodecUtil.checkEOF(in);
}
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
success = true;
} finally {
@ -208,6 +215,13 @@ class MemoryDocValuesProducer extends DocValuesProducer {
return ramBytesUsed.get();
}
@Override
public void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(data);
}
}
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.number);
data.seek(entry.offset + entry.missingBytes);

View File

@ -25,6 +25,7 @@ import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
@ -41,6 +42,7 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@ -271,6 +273,9 @@ public final class MemoryPostingsFormat extends PostingsFormat {
}
private static String EXTENSION = "ram";
private static final String CODEC_NAME = "MemoryPostings";
private static final int VERSION_START = 0;
private static final int VERSION_CURRENT = VERSION_START;
private class MemoryFieldsConsumer extends FieldsConsumer implements Closeable {
private final SegmentWriteState state;
@ -279,6 +284,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
private MemoryFieldsConsumer(SegmentWriteState state) throws IOException {
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
out = state.directory.createOutput(fileName, state.context);
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
this.state = state;
}
@ -403,6 +409,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
// EOF marker:
try {
out.writeVInt(0);
CodecUtil.writeFooter(out);
} finally {
out.close();
}
@ -951,7 +958,8 @@ public final class MemoryPostingsFormat extends PostingsFormat {
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE);
final ChecksumIndexInput in = state.directory.openChecksumInput(fileName, IOContext.READONCE);
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
final SortedMap<String,TermsReader> fields = new TreeMap<>();
@ -965,6 +973,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
// System.out.println("load field=" + termsReader.field.name);
fields.put(termsReader.field.name, termsReader);
}
CodecUtil.checkFooter(in);
} finally {
in.close();
}
@ -1002,6 +1011,9 @@ public final class MemoryPostingsFormat extends PostingsFormat {
}
return sizeInBytes;
}
@Override
public void checkIntegrity() throws IOException {}
};
}
}

View File

@ -653,4 +653,9 @@ public class PulsingPostingsReader extends PostingsReaderBase {
public long ramBytesUsed() {
return ((wrappedPostingsReader!=null) ? wrappedPostingsReader.ramBytesUsed(): 0);
}
@Override
public void checkIntegrity() throws IOException {
wrappedPostingsReader.checkIntegrity();
}
}

View File

@ -706,4 +706,9 @@ public class SepPostingsReader extends PostingsReaderBase {
public long ramBytesUsed() {
return 0;
}
@Override
public void checkIntegrity() throws IOException {
// TODO: remove sep layout, its fallen behind on features...
}
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
@ -48,6 +49,8 @@ import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -468,4 +471,19 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
public long ramBytesUsed() {
return 0;
}
@Override
public void checkIntegrity() throws IOException {
BytesRef scratch = new BytesRef();
IndexInput clone = data.clone();
clone.seek(0);
ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
while(true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.equals(END)) {
SimpleTextUtil.checkFooter(input, CHECKSUM);
break;
}
}
}
}

View File

@ -36,6 +36,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
class SimpleTextDocValuesWriter extends DocValuesConsumer {
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TYPE = new BytesRef(" type ");
@ -49,7 +50,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
final static BytesRef NUMVALUES = new BytesRef(" numvalues ");
final static BytesRef ORDPATTERN = new BytesRef(" ordpattern ");
final IndexOutput data;
IndexOutput data;
final BytesRef scratch = new BytesRef();
final int numDocs;
private final Set<String> fieldsSeen = new HashSet<>(); // for asserting
@ -389,12 +390,17 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
@Override
public void close() throws IOException {
if (data != null) {
boolean success = false;
try {
assert !fieldsSeen.isEmpty();
// TODO: sheisty to do this here?
SimpleTextUtil.write(data, END);
SimpleTextUtil.writeNewline(data);
String checksum = Long.toString(data.getChecksum());
SimpleTextUtil.write(data, CHECKSUM);
SimpleTextUtil.write(data, checksum, scratch);
SimpleTextUtil.writeNewline(data);
success = true;
} finally {
if (success) {
@ -402,6 +408,8 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
} else {
IOUtils.closeWhileHandlingException(data);
}
data = null;
}
}
}
}

View File

@ -24,15 +24,14 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
@ -50,7 +49,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
@Override
public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
IndexInput input = directory.openInput(fileName, iocontext);
ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
BytesRef scratch = new BytesRef();
boolean success = false;
@ -130,9 +129,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
infos[i].setDocValuesGen(dvGen);
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
SimpleTextUtil.checkFooter(input, CHECKSUM);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;

View File

@ -58,6 +58,7 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
final static BytesRef ATT_KEY = new BytesRef(" key ");
final static BytesRef ATT_VALUE = new BytesRef(" value ");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override
public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
@ -132,6 +133,10 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
}
}
}
String checksum = Long.toString(out.getChecksum());
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
success = true;
} finally {
if (success) {

View File

@ -34,6 +34,8 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@ -51,22 +53,23 @@ import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
class SimpleTextFieldsReader extends FieldsProducer {
private final TreeMap<String,Long> fields;
private final IndexInput in;
private final FieldInfos fieldInfos;
private final int maxDoc;
final static BytesRef END = SimpleTextFieldsWriter.END;
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
final static BytesRef POS = SimpleTextFieldsWriter.POS;
final static BytesRef START_OFFSET = SimpleTextFieldsWriter.START_OFFSET;
final static BytesRef END_OFFSET = SimpleTextFieldsWriter.END_OFFSET;
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
this.maxDoc = state.segmentInfo.getDocCount();
fieldInfos = state.fieldInfos;
@ -83,16 +86,18 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
BytesRef scratch = new BytesRef(10);
TreeMap<String,Long> fields = new TreeMap<>();
while (true) {
SimpleTextUtil.readLine(in, scratch);
SimpleTextUtil.readLine(input, scratch);
if (scratch.equals(END)) {
SimpleTextUtil.checkFooter(input, CHECKSUM);
return fields;
} else if (StringHelper.startsWith(scratch, FIELD)) {
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, StandardCharsets.UTF_8);
fields.put(fieldName, in.getFilePointer());
fields.put(fieldName, input.getFilePointer());
}
}
}
@ -669,4 +674,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
return sizeInBytes;
}
@Override
public void checkIntegrity() throws IOException {}
}

View File

@ -35,10 +35,11 @@ import org.apache.lucene.util.IOUtils;
class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
private final IndexOutput out;
private IndexOutput out;
private final BytesRef scratch = new BytesRef(10);
private final SegmentWriteState writeState;
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TERM = new BytesRef(" term ");
@ -215,11 +216,18 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
@Override
public void close() throws IOException {
if (out != null) {
try {
write(END);
newline();
String checksum = Long.toString(out.getChecksum());
write(CHECKSUM);
write(checksum);
newline();
} finally {
out.close();
out = null;
}
}
}
}

View File

@ -24,9 +24,9 @@ import java.util.Collection;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@ -50,6 +50,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
final static BytesRef SIZE = new BytesRef("size ");
final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef END = new BytesRef("END");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override
public MutableBits newLiveDocs(int size) throws IOException {
@ -69,10 +70,10 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
CharsRef scratchUTF16 = new CharsRef();
String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen());
IndexInput in = null;
ChecksumIndexInput in = null;
boolean success = false;
try {
in = dir.openInput(fileName, context);
in = dir.openChecksumInput(fileName, context);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, SIZE);
@ -88,6 +89,8 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
SimpleTextUtil.readLine(in, scratch);
}
SimpleTextUtil.checkFooter(in, CHECKSUM);
success = true;
return new SimpleTextBits(bits, size);
} finally {
@ -127,6 +130,10 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
SimpleTextUtil.write(out, END);
SimpleTextUtil.writeNewline(out);
String checksum = Long.toString(out.getChecksum());
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
success = true;
} finally {
if (success) {

View File

@ -17,6 +17,7 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
@ -36,9 +37,9 @@ import java.util.Set;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
@ -55,7 +56,7 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
BytesRef scratch = new BytesRef();
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
IndexInput input = directory.openInput(segFileName, context);
ChecksumIndexInput input = directory.openChecksumInput(segFileName, context);
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
@ -98,6 +99,8 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
files.add(fileName);
}
SimpleTextUtil.checkFooter(input, SI_CHECKSUM);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
isCompoundFile, null, diagnostics);
info.setFiles(files);

View File

@ -47,6 +47,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_CHECKSUM = new BytesRef(" checksum ");
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
@ -103,6 +104,11 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
SimpleTextUtil.writeNewline(output);
}
}
String checksum = Long.toString(output.getChecksum());
SimpleTextUtil.write(output, SI_CHECKSUM);
SimpleTextUtil.write(output, checksum, scratch);
SimpleTextUtil.writeNewline(output);
success = true;
} finally {
if (!success) {

View File

@ -27,6 +27,8 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -79,15 +81,17 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
// stored fields file in entirety up-front and save the offsets
// so we can seek to the documents later.
private void readIndex(int size) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
offsets = new long[size];
int upto = 0;
while (!scratch.equals(END)) {
readLine();
SimpleTextUtil.readLine(input, scratch);
if (StringHelper.startsWith(scratch, DOC)) {
offsets[upto] = in.getFilePointer();
offsets[upto] = input.getFilePointer();
upto++;
}
}
SimpleTextUtil.checkFooter(input, CHECKSUM);
assert upto == offsets.length;
}
@ -189,6 +193,11 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
}
private String readString(int offset, BytesRef scratch) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
return scratchUTF16.toString();
}
private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
return a.length == b.length - bOffset &&
ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
@ -198,4 +207,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
public long ramBytesUsed() {
return 0;
}
@Override
public void checkIntegrity() throws IOException {}
}

View File

@ -51,6 +51,7 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
final static BytesRef TYPE_FLOAT = new BytesRef("float");
final static BytesRef TYPE_DOUBLE = new BytesRef("double");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef DOC = new BytesRef("doc ");
final static BytesRef NUM = new BytesRef(" numfields ");
@ -171,6 +172,10 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
}
write(END);
newLine();
String checksum = Long.toString(out.getChecksum());
write(CHECKSUM);
write(checksum);
newLine();
}
@Override

View File

@ -33,6 +33,8 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -82,15 +84,17 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
// vectors file in entirety up-front and save the offsets
// so we can seek to the data later.
private void readIndex(int maxDoc) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
offsets = new long[maxDoc];
int upto = 0;
while (!scratch.equals(END)) {
readLine();
SimpleTextUtil.readLine(input, scratch);
if (StringHelper.startsWith(scratch, DOC)) {
offsets[upto] = in.getFilePointer();
offsets[upto] = input.getFilePointer();
upto++;
}
}
SimpleTextUtil.checkFooter(input, CHECKSUM);
assert upto == offsets.length;
}
@ -537,4 +541,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
public long ramBytesUsed() {
return 0;
}
@Override
public void checkIntegrity() throws IOException {}
}

View File

@ -37,6 +37,7 @@ import org.apache.lucene.util.IOUtils;
*/
public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
static final BytesRef CHECKSUM = new BytesRef("checksum ");
static final BytesRef END = new BytesRef("END");
static final BytesRef DOC = new BytesRef("doc ");
static final BytesRef NUMFIELDS = new BytesRef(" numfields ");
@ -177,6 +178,10 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
}
write(END);
newLine();
String checksum = Long.toString(out.getChecksum());
write(CHECKSUM);
write(checksum);
newLine();
}
@Override

View File

@ -17,11 +17,16 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
class SimpleTextUtil {
@ -67,4 +72,18 @@ class SimpleTextUtil {
scratch.offset = 0;
scratch.length = upto;
}
public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException {
BytesRef scratch = new BytesRef();
String expectedChecksum = Long.toString(input.getChecksum());
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, prefix);
String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString();
if (!expectedChecksum.equals(actualChecksum)) {
throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
}
if (input.length() != input.getFilePointer()) {
throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")");
}
}
}

View File

@ -132,6 +132,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
}
// verify
if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(indexIn);
}
// Have PostingsReader init itself
postingsReader.init(in);
@ -157,7 +162,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
final int longsSize = version >= BlockTreeTermsWriter.TERMS_VERSION_META_ARRAY ? in.readVInt() : 0;
final int longsSize = version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
@ -187,9 +192,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
/** Reads terms file header. */
private int readHeader(IndexInput input) throws IOException {
int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
BlockTreeTermsWriter.TERMS_VERSION_START,
BlockTreeTermsWriter.TERMS_VERSION_CURRENT);
if (version < BlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) {
BlockTreeTermsWriter.VERSION_START,
BlockTreeTermsWriter.VERSION_CURRENT);
if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
dirOffset = input.readLong();
}
return version;
@ -198,9 +203,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
/** Reads index file header. */
private int readIndexHeader(IndexInput input) throws IOException {
int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
BlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
if (version < BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
BlockTreeTermsWriter.VERSION_START,
BlockTreeTermsWriter.VERSION_CURRENT);
if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
indexDirOffset = input.readLong();
}
return version;
@ -209,7 +214,10 @@ public class BlockTreeTermsReader extends FieldsProducer {
/** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input, long dirOffset)
throws IOException {
if (version >= BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
input.seek(input.length() - CodecUtil.footerLength() - 8);
dirOffset = input.readLong();
} else if (version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
@ -2977,4 +2985,15 @@ public class BlockTreeTermsReader extends FieldsProducer {
}
return sizeInByes;
}
@Override
public void checkIntegrity() throws IOException {
if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
// term dictionary
CodecUtil.checksumEntireFile(in);
// postings
postingsReader.checkIntegrity();
}
}
}

View File

@ -109,7 +109,7 @@ import org.apache.lucene.util.packed.PackedInts;
*
* <ul>
* <li>TermsDict (.tim) --&gt; Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
* FieldSummary, DirOffset</li>
* FieldSummary, DirOffset, Footer</li>
* <li>NodeBlock --&gt; (OuterNode | InnerNode)</li>
* <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata</i>&gt;<sup>EntryCount</sup></li>
* <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ? </i>&gt;<sup>EntryCount</sup></li>
@ -122,6 +122,7 @@ import org.apache.lucene.util.packed.PackedInts;
* FieldNumber,RootCodeLength,DocCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --&gt;
* {@link DataOutput#writeVLong VLong}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@ -150,12 +151,13 @@ import org.apache.lucene.util.packed.PackedInts;
* when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
* <ul>
* <li>TermsIndex (.tip) --&gt; Header, FSTIndex<sup>NumFields</sup>
* &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset</li>
* &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>IndexStartFP --&gt; {@link DataOutput#writeVLong VLong}</li>
* <!-- TODO: better describe FST output here -->
* <li>FSTIndex --&gt; {@link FST FST&lt;byte[]&gt;}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@ -178,7 +180,6 @@ import org.apache.lucene.util.packed.PackedInts;
* @see BlockTreeTermsReader
* @lucene.experimental
*/
public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
/** Suggested default value for the {@code
@ -204,33 +205,24 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
final static String TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT";
/** Initial terms format. */
public static final int TERMS_VERSION_START = 0;
public static final int VERSION_START = 0;
/** Append-only */
public static final int TERMS_VERSION_APPEND_ONLY = 1;
public static final int VERSION_APPEND_ONLY = 1;
/** Meta data as array */
public static final int TERMS_VERSION_META_ARRAY = 2;
public static final int VERSION_META_ARRAY = 2;
/** checksums */
public static final int VERSION_CHECKSUM = 3;
/** Current terms format. */
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_META_ARRAY;
public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tip";
final static String TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX";
/** Initial index format. */
public static final int TERMS_INDEX_VERSION_START = 0;
/** Append-only */
public static final int TERMS_INDEX_VERSION_APPEND_ONLY = 1;
/** Meta data as array */
public static final int TERMS_INDEX_VERSION_META_ARRAY = 2;
/** Current index format. */
public static final int TERMS_INDEX_VERSION_CURRENT = TERMS_INDEX_VERSION_META_ARRAY;
private final IndexOutput out;
private final IndexOutput indexOut;
final int maxDoc;
@ -326,12 +318,12 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
/** Writes the terms file header. */
private void writeHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, VERSION_CURRENT);
}
/** Writes the index file header. */
private void writeIndexHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, TERMS_INDEX_VERSION_CURRENT);
CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT);
}
/** Writes the terms file trailer. */
@ -1139,13 +1131,13 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
if (TERMS_VERSION_CURRENT >= TERMS_VERSION_META_ARRAY) {
out.writeVInt(field.longsSize);
}
indexOut.writeVLong(field.indexStartFP);
}
writeTrailer(out, dirStart);
CodecUtil.writeFooter(out);
writeIndexTrailer(indexOut, indexDirStart);
CodecUtil.writeFooter(indexOut);
} catch (IOException ioe2) {
ioe = ioe2;
} finally {

View File

@ -23,8 +23,12 @@ import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
/**
@ -43,6 +47,10 @@ public final class CodecUtil {
* Constant to identify the start of a codec header.
*/
public final static int CODEC_MAGIC = 0x3fd76c17;
/**
* Constant to identify the start of a codec footer.
*/
public final static int FOOTER_MAGIC = ~CODEC_MAGIC;
/**
* Writes a codec header, which records both a string to
@ -150,4 +158,119 @@ public final class CodecUtil {
return actualVersion;
}
/**
* Writes a codec footer, which records both a checksum
* algorithm ID and a checksum. This footer can
* be parsed and validated with
* {@link #checkFooter(ChecksumIndexInput) checkFooter()}.
* <p>
* CodecFooter --&gt; Magic,AlgorithmID,Checksum
* <ul>
* <li>Magic --&gt; {@link DataOutput#writeInt Uint32}. This
* identifies the start of the footer. It is always {@value #FOOTER_MAGIC}.
* <li>AlgorithmID --&gt; {@link DataOutput#writeInt Uint32}. This
* indicates the checksum algorithm used. Currently this is always 0,
* for zlib-crc32.
* <li>Checksum --&gt; {@link DataOutput#writeLong Uint32}. The
* actual checksum value for all previous bytes in the stream, including
* the bytes from Magic and AlgorithmID.
* </ul>
*
* @param out Output stream
* @throws IOException If there is an I/O error writing to the underlying medium.
*/
public static void writeFooter(IndexOutput out) throws IOException {
out.writeInt(FOOTER_MAGIC);
out.writeInt(0);
out.writeLong(out.getChecksum());
}
/**
* Computes the length of a codec footer.
*
* @return length of the entire codec footer.
* @see #writeFooter(IndexOutput)
*/
public static int footerLength() {
return 16;
}
/**
* Validates the codec footer previously written by {@link #writeFooter}.
* @return actual checksum value
* @throws IOException if the footer is invalid, if the checksum does not match,
* or if {@code in} is not properly positioned before the footer
* at the end of the stream.
*/
public static long checkFooter(ChecksumIndexInput in) throws IOException {
validateFooter(in);
long actualChecksum = in.getChecksum();
long expectedChecksum = in.readLong();
if (expectedChecksum != actualChecksum) {
throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) +
" actual=" + Long.toHexString(actualChecksum) +
" (resource=" + in + ")");
}
if (in.getFilePointer() != in.length()) {
throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
}
return actualChecksum;
}
/**
* Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
* @return actual checksum value
* @throws IOException if the footer is invalid
*/
public static long retrieveChecksum(IndexInput in) throws IOException {
in.seek(in.length() - footerLength());
validateFooter(in);
return in.readLong();
}
private static void validateFooter(IndexInput in) throws IOException {
final int magic = in.readInt();
if (magic != FOOTER_MAGIC) {
throw new CorruptIndexException("codec footer mismatch: actual footer=" + magic + " vs expected footer=" + FOOTER_MAGIC + " (resource: " + in + ")");
}
final int algorithmID = in.readInt();
if (algorithmID != 0) {
throw new CorruptIndexException("codec footer mismatch: unknown algorithmID: " + algorithmID);
}
}
/**
* Checks that the stream is positioned at the end, and throws exception
* if it is not.
* @deprecated Use {@link #checkFooter} instead, this should only used for files without checksums
*/
@Deprecated
public static void checkEOF(IndexInput in) throws IOException {
if (in.getFilePointer() != in.length()) {
throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
}
}
/**
* Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter}
* <p>
* Note that this method may be slow, as it must process the entire file.
* If you just need to extract the checksum value, call {@link #retrieveChecksum}.
*/
public static long checksumEntireFile(IndexInput input) throws IOException {
IndexInput clone = input.clone();
clone.seek(0);
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
assert in.getFilePointer() == 0;
final byte[] buffer = new byte[1024];
long bytesToRead = in.length() - footerLength();
for (long skipped = 0; skipped < bytesToRead; ) {
final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length);
in.readBytes(buffer, 0, toRead);
skipped += toRead;
}
return checkFooter(in);
}
}

View File

@ -67,6 +67,15 @@ public abstract class DocValuesProducer implements Closeable {
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
/**
* Checks consistency of this producer
* <p>
* Note that this may be costly in terms of I/O, e.g.
* may involve computing a checksum value against large data files.
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
/**
* A simple implementation of {@link DocValuesProducer#getDocsWithField} that
* returns {@code true} if a document has an ordinal &gt;= 0

View File

@ -39,4 +39,13 @@ public abstract class FieldsProducer extends Fields implements Closeable {
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
/**
* Checks consistency of this reader.
* <p>
* Note that this may be costly in terms of I/O, e.g.
* may involve computing a checksum value against large data files.
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
}

View File

@ -72,6 +72,15 @@ public abstract class PostingsReaderBase implements Closeable {
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
/**
* Checks consistency of this reader.
* <p>
* Note that this may be costly in terms of I/O, e.g.
* may involve computing a checksum value against large data files.
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
@Override
public abstract void close() throws IOException;
}

View File

@ -43,4 +43,13 @@ public abstract class StoredFieldsReader implements Cloneable, Closeable {
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
/**
* Checks consistency of this reader.
* <p>
* Note that this may be costly in terms of I/O, e.g.
* may involve computing a checksum value against large data files.
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
}

View File

@ -45,6 +45,15 @@ public abstract class TermVectorsReader implements Cloneable, Closeable {
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
/**
* Checks consistency of this reader.
* <p>
* Note that this may be costly in terms of I/O, e.g.
* may involve computing a checksum value against large data files.
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
/** Create a clone that one caller at a time may use to
* read term vectors. */
@Override

View File

@ -21,6 +21,7 @@ import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.packed.PackedInts;
@ -52,6 +53,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>AvgChunkSize --&gt; the average size of a chunk of compressed documents, as a {@link DataOutput#writeVLong VLong}</li>
* <li>BitsPerStartPointerDelta --&gt; number of bits required to represent a delta from the average using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li>
* <li>StartPointerDeltas --&gt; {@link PackedInts packed} array of BlockChunks elements of BitsPerStartPointerDelta bits each, representing the deltas from the average start pointer using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes</p>
* <ul>
@ -198,6 +200,7 @@ public final class CompressingStoredFieldsIndexWriter implements Closeable {
writeBlock();
}
fieldsIndexOut.writeVInt(0); // end marker
CodecUtil.writeFooter(fieldsIndexOut);
}
@Override

View File

@ -28,6 +28,7 @@ import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
@ -48,6 +49,7 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
@ -114,17 +116,20 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
boolean success = false;
fieldInfos = fn;
numDocs = si.getDocCount();
IndexInput indexStream = null;
ChecksumIndexInput indexStream = null;
try {
// Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
if (indexStream.getFilePointer() != indexStream.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")");
if (version >= VERSION_CHECKSUM) {
CodecUtil.checkFooter(indexStream);
} else {
CodecUtil.checkEOF(indexStream);
}
indexStream.close();
indexStream = null;
@ -510,4 +515,11 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
return indexReader.ramBytesUsed();
}
@Override
public void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(fieldsStream);
}
}
}

View File

@ -71,7 +71,8 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
static final String CODEC_SFX_DAT = "Data";
static final int VERSION_START = 0;
static final int VERSION_BIG_CHUNKS = 1;
static final int VERSION_CURRENT = VERSION_BIG_CHUNKS;
static final int VERSION_CHECKSUM = 2;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
private final Directory directory;
private final String segment;
@ -106,9 +107,11 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
this.numBufferedDocs = 0;
boolean success = false;
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context);
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION),
context);
try {
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
@ -314,6 +317,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
}
indexWriter.finish(numDocs);
CodecUtil.writeFooter(fieldsStream);
assert bufferedDocs.length == 0;
}

View File

@ -28,6 +28,7 @@ import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM;
import java.io.Closeable;
import java.io.IOException;
@ -48,6 +49,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -69,6 +71,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
private final FieldInfos fieldInfos;
final CompressingStoredFieldsIndexReader indexReader;
final IndexInput vectorsStream;
private final int version;
private final int packedIntsVersion;
private final CompressionMode compressionMode;
private final Decompressor decompressor;
@ -88,6 +91,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
this.chunkSize = reader.chunkSize;
this.numDocs = reader.numDocs;
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
this.version = reader.version;
this.closed = false;
}
@ -99,17 +103,20 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
boolean success = false;
fieldInfos = fn;
numDocs = si.getDocCount();
IndexInput indexStream = null;
ChecksumIndexInput indexStream = null;
try {
// Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
int version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
if (indexStream.getFilePointer() != indexStream.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")");
if (version >= VERSION_CHECKSUM) {
CodecUtil.checkFooter(indexStream);
} else {
CodecUtil.checkEOF(indexStream);
}
indexStream.close();
indexStream = null;
@ -1046,4 +1053,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
return indexReader.ramBytesUsed();
}
@Override
public void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(vectorsStream);
}
}
}

View File

@ -66,7 +66,8 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
static final String CODEC_SFX_DAT = "Data";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final int VERSION_CHECKSUM = 1;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
static final int BLOCK_SIZE = 64;
@ -220,9 +221,11 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
lastTerm = new BytesRef(ArrayUtil.oversize(30, 1));
boolean success = false;
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context);
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION),
context);
try {
vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION),
context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
@ -659,6 +662,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs);
}
indexWriter.finish(numDocs);
CodecUtil.writeFooter(vectorsStream);
}
@Override

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@ -199,8 +201,11 @@ final class BitVector implements Cloneable, MutableBits {
// set:
public final static int VERSION_DGAPS_CLEARED = 1;
// added checksum
public final static int VERSION_CHECKSUM = 2;
// Increment version to change it:
public final static int VERSION_CURRENT = VERSION_DGAPS_CLEARED;
public final static int VERSION_CURRENT = VERSION_CHECKSUM;
public int getVersion() {
return version;
@ -221,6 +226,7 @@ final class BitVector implements Cloneable, MutableBits {
} else {
writeBits(output);
}
CodecUtil.writeFooter(output);
assert verifyCount();
} finally {
IOUtils.close(output);
@ -324,7 +330,7 @@ final class BitVector implements Cloneable, MutableBits {
<code>d</code>, as written by the {@link #write} method.
*/
public BitVector(Directory d, String name, IOContext context) throws IOException {
IndexInput input = d.openInput(name, context);
ChecksumIndexInput input = d.openChecksumInput(name, context);
try {
final int firstInt = input.readInt();
@ -334,8 +340,8 @@ final class BitVector implements Cloneable, MutableBits {
version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION_CURRENT);
size = input.readInt();
} else {
version = VERSION_PRE;
size = firstInt;
// we started writing full header well before 4.0
throw new IndexFormatTooOldException(input.toString(), Integer.toString(firstInt));
}
if (size == -1) {
if (version >= VERSION_DGAPS_CLEARED) {
@ -351,6 +357,11 @@ final class BitVector implements Cloneable, MutableBits {
invertAll();
}
if (version >= VERSION_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
assert verifyCount();
} finally {
input.close();

View File

@ -105,9 +105,7 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
default:
throw new AssertionError();
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
CodecUtil.checkEOF(input);
success = true;
} finally {
if (success) {
@ -327,9 +325,7 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
PagedBytes bytes = new PagedBytes(16);
bytes.copy(input, fixedLength * (long)state.segmentInfo.getDocCount());
final PagedBytes.Reader bytesReader = bytes.freeze(true);
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
CodecUtil.checkEOF(input);
success = true;
ramBytesUsed.addAndGet(bytes.ramBytesUsed());
return new BinaryDocValues() {
@ -367,12 +363,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
bytes.copy(data, totalBytes);
final PagedBytes.Reader bytesReader = bytes.freeze(true);
final PackedInts.Reader reader = PackedInts.getReader(index);
if (data.getFilePointer() != data.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
}
if (index.getFilePointer() != index.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
}
CodecUtil.checkEOF(data);
CodecUtil.checkEOF(index);
success = true;
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
return new BinaryDocValues() {
@ -414,12 +406,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
bytes.copy(data, fixedLength * (long) valueCount);
final PagedBytes.Reader bytesReader = bytes.freeze(true);
final PackedInts.Reader reader = PackedInts.getReader(index);
if (data.getFilePointer() != data.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
}
if (index.getFilePointer() != index.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
}
CodecUtil.checkEOF(data);
CodecUtil.checkEOF(index);
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
success = true;
return new BinaryDocValues() {
@ -459,12 +447,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
bytes.copy(data, totalBytes);
final PagedBytes.Reader bytesReader = bytes.freeze(true);
final PackedInts.Reader reader = PackedInts.getReader(index);
if (data.getFilePointer() != data.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
}
if (index.getFilePointer() != index.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
}
CodecUtil.checkEOF(data);
CodecUtil.checkEOF(index);
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
success = true;
return new BinaryDocValues() {
@ -515,12 +499,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
default:
throw new AssertionError();
}
if (data.getFilePointer() != data.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
}
if (index.getFilePointer() != index.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
}
CodecUtil.checkEOF(data);
CodecUtil.checkEOF(index);
success = true;
} finally {
if (success) {
@ -654,4 +634,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer {
public long ramBytesUsed() {
return ramBytesUsed.get();
}
@Override
public void checkIntegrity() throws IOException {
}
}

View File

@ -107,9 +107,7 @@ class Lucene40FieldInfosReader extends FieldInfosReader {
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, Collections.unmodifiableMap(attributes));
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;

View File

@ -1168,4 +1168,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
return 0;
}
@Override
public void checkIntegrity() throws IOException {}
}

View File

@ -64,9 +64,7 @@ public class Lucene40SegmentInfoReader extends SegmentInfoReader {
input.readStringStringMap(); // read deprecated attributes
final Set<String> files = input.readStringSet();
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
CodecUtil.checkEOF(input);
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
si.setFiles(files);

View File

@ -250,4 +250,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
public long ramBytesUsed() {
return 0;
}
@Override
public void checkIntegrity() throws IOException {}
}

View File

@ -760,5 +760,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos
public long ramBytesUsed() {
return 0;
}
@Override
public void checkIntegrity() throws IOException {}
}

View File

@ -132,6 +132,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>Header, --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>PackedBlockSize, SingletonDocID --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --&gt; {@link DataOutput#writeVLong VLong}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@ -190,7 +191,7 @@ import org.apache.lucene.util.packed.PackedInts;
* each packed or VInt block, when the length of document list is larger than packed block size.</p>
*
* <ul>
* <li>docFile(.doc) --&gt; Header, &lt;TermFreqs, SkipData?&gt;<sup>TermCount</sup></li>
* <li>docFile(.doc) --&gt; Header, &lt;TermFreqs, SkipData?&gt;<sup>TermCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermFreqs --&gt; &lt;PackedBlock&gt; <sup>PackedDocBlockNum</sup>,
* VIntBlock? </li>
@ -206,6 +207,7 @@ import org.apache.lucene.util.packed.PackedInts;
* --&gt;
* {@link DataOutput#writeVInt VInt}</li>
* <li>SkipChildLevelPointer --&gt; {@link DataOutput#writeVLong VLong}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@ -273,7 +275,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>The .pos file contains the lists of positions that each term occurs at within documents. It also
* sometimes stores part of payloads and offsets for speedup.</p>
* <ul>
* <li>PosFile(.pos) --&gt; Header, &lt;TermPositions&gt; <sup>TermCount</sup></li>
* <li>PosFile(.pos) --&gt; Header, &lt;TermPositions&gt; <sup>TermCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermPositions --&gt; &lt;PackedPosDeltaBlock&gt; <sup>PackedPosBlockNum</sup>,
* VIntBlock? </li>
@ -283,6 +285,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>PositionDelta, OffsetDelta, OffsetLength --&gt;
* {@link DataOutput#writeVInt VInt}</li>
* <li>PayloadData --&gt; {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@ -325,13 +328,14 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>The .pay file will store payloads and offsets associated with certain term-document positions.
* Some payloads and offsets will be separated out into .pos file, for performance reasons.</p>
* <ul>
* <li>PayFile(.pay): --&gt; Header, &lt;TermPayloads, TermOffsets?&gt; <sup>TermCount</sup></li>
* <li>PayFile(.pay): --&gt; Header, &lt;TermPayloads, TermOffsets?&gt; <sup>TermCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermPayloads --&gt; &lt;PackedPayLengthBlock, SumPayLength, PayData&gt; <sup>PackedPayBlockNum</sup>
* <li>TermOffsets --&gt; &lt;PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock&gt; <sup>PackedPayBlockNum</sup>
* <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --&gt; {@link PackedInts PackedInts}</li>
* <li>SumPayLength --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>PayData --&gt; {@link DataOutput#writeByte byte}<sup>SumPayLength</sup></li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>

View File

@ -35,7 +35,6 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@ -1547,4 +1546,18 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
return 0;
}
@Override
public void checkIntegrity() throws IOException {
if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
if (docIn != null) {
CodecUtil.checksumEntireFile(docIn);
}
if (posIn != null) {
CodecUtil.checksumEntireFile(posIn);
}
if (payIn != null) {
CodecUtil.checksumEntireFile(payIn);
}
}
}
}

View File

@ -64,11 +64,12 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
// Increment version to change it
final static int VERSION_START = 0;
final static int VERSION_META_ARRAY = 1;
final static int VERSION_CURRENT = VERSION_META_ARRAY;
final static int VERSION_CHECKSUM = 2;
final static int VERSION_CURRENT = VERSION_CHECKSUM;
final IndexOutput docOut;
final IndexOutput posOut;
final IndexOutput payOut;
IndexOutput docOut;
IndexOutput posOut;
IndexOutput payOut;
final static IntBlockTermState emptyState = new IntBlockTermState();
IntBlockTermState lastState;
@ -569,6 +570,26 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
@Override
public void close() throws IOException {
// TODO: add a finish() at least to PushBase? DV too...?
boolean success = false;
try {
if (docOut != null) {
CodecUtil.writeFooter(docOut);
}
if (posOut != null) {
CodecUtil.writeFooter(posOut);
}
if (payOut != null) {
CodecUtil.writeFooter(payOut);
}
success = true;
} finally {
if (success) {
IOUtils.close(docOut, posOut, payOut);
} else {
IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
}
docOut = posOut = payOut = null;
}
}
}

View File

@ -68,7 +68,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
* <p>The DocValues metadata or .dvm file.</p>
* <p>For DocValues field, this stores metadata, such as the offset into the
* DocValues data (.dvd)</p>
* <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup></p>
* <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry</li>
* <li>NumericEntry --&gt; DataOffset,CompressionType,PackedVersion</li>
@ -78,6 +78,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
* <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
@ -105,7 +106,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
* <li><a name="dvd" id="dvd"></a>
* <p>The DocValues data or .dvd file.</p>
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
* <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup></p>
* <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</li>
* <li>BinaryData --&gt; {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
@ -114,6 +115,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
* <li>TableCompressedNumerics --&gt; TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
* <li>UncompressedNumerics --&gt; {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li>
* <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=4096)}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -64,6 +65,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
private final Map<Integer,BinaryEntry> binaries;
private final Map<Integer,FSTEntry> fsts;
private final IndexInput data;
private final int version;
// ram instances we have already loaded
private final Map<Integer,NumericDocValues> numericInstances =
@ -89,16 +91,16 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
static final int VERSION_START = 0;
static final int VERSION_GCD_COMPRESSION = 1;
static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
static final int VERSION_CHECKSUM = 2;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
boolean success = false;
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
VERSION_START,
@ -108,8 +110,10 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
fsts = new HashMap<>();
readFields(in, state.fieldInfos);
if (in.getFilePointer() != in.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
if (version >= VERSION_CHECKSUM) {
CodecUtil.checkFooter(in);
} else {
CodecUtil.checkEOF(in);
}
success = true;
@ -199,6 +203,13 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
return ramBytesUsed.get();
}
@Override
public void checkIntegrity() throws IOException {
if (version >= VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(data);
}
}
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.number);
data.seek(entry.offset);

View File

@ -92,9 +92,7 @@ final class Lucene42FieldInfosReader extends FieldInfosReader {
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;

View File

@ -34,14 +34,12 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT;
/**
* Writer for {@link Lucene42NormsFormat}
*/
class Lucene42NormsConsumer extends DocValuesConsumer {
static final int VERSION_START = 0;
static final int VERSION_GCD_COMPRESSION = 1;
static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
static final byte NUMBER = 0;
static final int BLOCK_SIZE = 4096;
@ -51,7 +49,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
static final byte UNCOMPRESSED = 2;
static final byte GCD_COMPRESSED = 3;
final IndexOutput data, meta;
IndexOutput data, meta;
final int maxDoc;
final float acceptableOverheadRatio;
@ -181,6 +179,10 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
CodecUtil.writeFooter(meta); // write checksum
}
if (data != null) {
CodecUtil.writeFooter(data); // write checksum
}
success = true;
} finally {
@ -189,6 +191,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
meta = data = null;
}
}

View File

@ -59,7 +59,7 @@ import org.apache.lucene.util.packed.PackedInts;
* {@link BlockPackedWriter blocks of packed ints} for positions.</p>
* <p>Here is a more detailed description of the field data file format:</p>
* <ul>
* <li>VectorData (.tvd) --&gt; &lt;Header&gt;, PackedIntsVersion, ChunkSize, &lt;Chunk&gt;<sup>ChunkCount</sup></li>
* <li>VectorData (.tvd) --&gt; &lt;Header&gt;, PackedIntsVersion, ChunkSize, &lt;Chunk&gt;<sup>ChunkCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>PackedIntsVersion --&gt; {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li>
* <li>ChunkSize is the number of bytes of terms to accumulate before flushing, as a {@link DataOutput#writeVInt VInt}</li>
@ -107,14 +107,16 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>FieldTermsAndPayLoads --&gt; Terms (Payloads)</li>
* <li>Terms: term bytes</li>
* <li>Payloads: payload bytes (if the field has payloads)</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </li>
* <li><a name="vector_index" id="vector_index"></a>
* <p>An index file (extension <tt>.tvx</tt>).</p>
* <ul>
* <li>VectorIndex (.tvx) --&gt; &lt;Header&gt;, &lt;ChunkIndex&gt;</li>
* <li>VectorIndex (.tvx) --&gt; &lt;Header&gt;, &lt;ChunkIndex&gt;, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>ChunkIndex: See {@link CompressingStoredFieldsIndexWriter}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </li>
* </ol>

View File

@ -66,7 +66,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
* of indirection: docId -> ord. */
public static final int SORTED_SET_SINGLE_VALUED_SORTED = 1;
final IndexOutput data, meta;
IndexOutput data, meta;
final int maxDoc;
/** expert: Creates a new writer */
@ -438,6 +438,10 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
CodecUtil.writeFooter(meta); // write checksum
}
if (data != null) {
CodecUtil.writeFooter(data); // write checksum
}
success = true;
} finally {
@ -446,6 +450,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
meta = data = null;
}
}
}

View File

@ -89,7 +89,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>The DocValues metadata or .dvm file.</p>
* <p>For DocValues field, this stores metadata, such as the offset into the
* DocValues data (.dvd)</p>
* <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup></p>
* <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li>
* <li>NumericEntry --&gt; GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
@ -109,6 +109,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
@ -138,10 +139,13 @@ import org.apache.lucene.util.packed.PackedInts;
* is written for the addresses.
* <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
* If its -1, then there are no missing values.
* <p>Checksum contains the CRC32 checksum of all bytes in the .dvm file up
* until the checksum. This is used to verify integrity of the file on opening the
* index.
* <li><a name="dvd" id="dvd"></a>
* <p>The DocValues data or .dvd file.</p>
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
* <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup></p>
* <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics</li>
* <li>BinaryData --&gt; {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
@ -150,6 +154,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>TableCompressedNumerics --&gt; {@link PackedInts PackedInts}</li>
* <li>GCDCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
* <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
@ -179,7 +184,8 @@ public final class Lucene45DocValuesFormat extends DocValuesFormat {
static final String META_EXTENSION = "dvm";
static final int VERSION_START = 0;
static final int VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED = 1;
static final int VERSION_CURRENT = VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED;
static final int VERSION_CHECKSUM = 2;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
static final byte NUMERIC = 0;
static final byte BINARY = 1;
static final byte SORTED = 2;

View File

@ -50,6 +50,7 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -80,7 +81,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
this.maxDoc = state.segmentInfo.getDocCount();
boolean success = false;
try {
@ -94,8 +95,10 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
sortedSets = new HashMap<>();
readFields(in, state.fieldInfos);
if (in.getFilePointer() != in.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
CodecUtil.checkFooter(in);
} else {
CodecUtil.checkEOF(in);
}
success = true;
@ -299,6 +302,13 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
return ramBytesUsed.get();
}
@Override
public void checkIntegrity() throws IOException {
if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
CodecUtil.checksumEntireFile(data);
}
}
LongValues getNumeric(NumericEntry entry) throws IOException {
final IndexInput data = this.data.clone();
data.seek(entry.offset);

View File

@ -32,7 +32,7 @@ import org.apache.lucene.store.DataOutput;
* <p>
* <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p>
* <p>FieldInfos (.fnm) --&gt; Header,FieldsCount, &lt;FieldName,FieldNumber,
* FieldBits,DocValuesBits,DocValuesGen,Attributes&gt; <sup>FieldsCount</sup></p>
* FieldBits,DocValuesBits,DocValuesGen,Attributes&gt; <sup>FieldsCount</sup>,Footer</p>
* <p>Data types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#checkHeader CodecHeader}</li>
@ -42,6 +42,7 @@ import org.apache.lucene.store.DataOutput;
* <li>FieldNumber --&gt; {@link DataOutput#writeInt VInt}</li>
* <li>Attributes --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>DocValuesGen --&gt; {@link DataOutput#writeLong(long) Int64}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
@ -113,7 +114,8 @@ public final class Lucene46FieldInfosFormat extends FieldInfosFormat {
// Codec header
static final String CODEC_NAME = "Lucene46FieldInfos";
static final int FORMAT_START = 0;
static final int FORMAT_CURRENT = FORMAT_START;
static final int FORMAT_CHECKSUM = 1;
static final int FORMAT_CURRENT = FORMAT_CHECKSUM;
// Field flags
static final byte IS_INDEXED = 0x1;

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -49,11 +50,11 @@ final class Lucene46FieldInfosReader extends FieldInfosReader {
@Override
public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
IndexInput input = directory.openInput(fileName, context);
ChecksumIndexInput input = directory.openChecksumInput(fileName, context);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
Lucene46FieldInfosFormat.FORMAT_START,
Lucene46FieldInfosFormat.FORMAT_CURRENT);
@ -92,8 +93,10 @@ final class Lucene46FieldInfosReader extends FieldInfosReader {
infos[i].setDocValuesGen(dvGen);
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;

View File

@ -26,9 +26,9 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
@ -81,6 +81,7 @@ final class Lucene46FieldInfosWriter extends FieldInfosWriter {
output.writeLong(fi.getDocValuesGen());
output.writeStringStringMap(fi.attributes());
}
CodecUtil.writeFooter(output);
success = true;
} finally {
if (success) {

View File

@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
* <p>
* Files:
* <ul>
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
* </ul>
* </p>
* Data types:
@ -43,6 +43,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
* <li>Files --&gt; {@link DataOutput#writeStringSet Set&lt;String&gt;}</li>
* <li>Diagnostics --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
@ -53,9 +54,6 @@ import org.apache.lucene.store.DataOutput; // javadocs
* <li>IsCompoundFile records whether the segment is written as a compound file or
* not. If this is -1, the segment is not a compound file. If it is 1, the segment
* is a compound file.</li>
* <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up
* until the checksum. This is used to verify integrity of the file on opening the
* index.</li>
* <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
* for each segment it creates. It includes metadata like the current Lucene
* version, OS, Java version, why the segment was created (merge, flush,
@ -89,5 +87,6 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene46SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final int VERSION_CHECKSUM = 1;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
}

View File

@ -26,9 +26,9 @@ import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
/**
@ -46,10 +46,10 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
@Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
final IndexInput input = dir.openInput(fileName, context);
final ChecksumIndexInput input = dir.openChecksumInput(fileName, context);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
Lucene46SegmentInfoFormat.VERSION_START,
Lucene46SegmentInfoFormat.VERSION_CURRENT);
final String version = input.readString();
@ -61,8 +61,10 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);

View File

@ -59,7 +59,7 @@ public class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringSet(si.files());
CodecUtil.writeFooter(output);
success = true;
} finally {
if (!success) {

View File

@ -383,6 +383,9 @@ on multi-valued fields.</li>
<li>In version 4.5, DocValues were extended to explicitly represent missing values.</li>
<li>In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
allow updating NumericDocValues fields.</li>
<li>In version 4.8, checksum footers were added to the end of each index file
for improved data integrity. Specifically, the last 8 bytes of every index file
contain the zlib-crc32 checksum of the file.</li>
</ul>
<a name="Limitations" id="Limitations"></a>
<h2>Limitations</h2>

View File

@ -310,6 +310,13 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
}
return size;
}
@Override
public void checkIntegrity() throws IOException {
for (DocValuesProducer format : formats.values()) {
format.checkIntegrity();
}
}
}
@Override

View File

@ -246,6 +246,13 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
}
return sizeInBytes;
}
@Override
public void checkIntegrity() throws IOException {
for (FieldsProducer producer : formats.values()) {
producer.checkIntegrity();
}
}
}
@Override

View File

@ -238,4 +238,13 @@ public abstract class AtomicReader extends IndexReader {
* synchronization.
*/
public abstract Bits getLiveDocs();
/**
* Checks consistency of this reader.
* <p>
* Note that this may be costly in terms of I/O, e.g.
* may involve computing a checksum value against large data files.
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
}

View File

@ -537,6 +537,10 @@ public class CheckIndex {
segInfoStat.openReaderPassed = true;
if (infoStream != null)
infoStream.print(" test: check integrity.........");
reader.checkIntegrity();
final int numDocs = reader.numDocs();
toLoseDocCount = numDocs;
if (reader.hasDeletions()) {

View File

@ -423,4 +423,9 @@ public class FilterAtomicReader extends AtomicReader {
return in.getDocsWithField(field);
}
@Override
public void checkIntegrity() throws IOException {
ensureOpen();
in.checkIntegrity();
}
}

View File

@ -2651,7 +2651,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
false, codec, null);
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
MergeState.CheckAbort.NONE, globalFieldNumberMap,
context, config.getCheckIntegrityAtMerge());
if (!merger.shouldMerge()) {
return;
@ -4051,7 +4052,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
// OneMerge to return a view over the actual segments to merge
final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
merge.info.info, infoStream, dirWrapper,
checkAbort, globalFieldNumberMap, context);
checkAbort, globalFieldNumberMap,
context, config.getCheckIntegrityAtMerge());
merge.checkAborted(directory);

View File

@ -110,6 +110,12 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
* (set to <code>true</code>). For batch indexing with very large
* ram buffers use <code>false</code> */
public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true;
/** Default value for calling {@link AtomicReader#checkIntegrity()} before
* merging segments (set to <code>false</code>). You can set this
* to <code>true</code> for additional safety. */
public final static boolean DEFAULT_CHECK_INTEGRITY_AT_MERGE = false;
/**
* Sets the default (for any instance) maximum time to wait for a write lock
* (in milliseconds).

View File

@ -98,6 +98,9 @@ public class LiveIndexWriterConfig {
/** True if segment flushes should use compound file format */
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
/** True if merging should check integrity of segments before merge */
protected volatile boolean checkIntegrityAtMerge = IndexWriterConfig.DEFAULT_CHECK_INTEGRITY_AT_MERGE;
// used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
this.analyzer = analyzer;
@ -152,6 +155,7 @@ public class LiveIndexWriterConfig {
flushPolicy = config.getFlushPolicy();
perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
useCompoundFile = config.getUseCompoundFile();
checkIntegrityAtMerge = config.getCheckIntegrityAtMerge();
}
/** Returns the default analyzer to use for indexing documents. */
@ -475,6 +479,26 @@ public class LiveIndexWriterConfig {
return useCompoundFile ;
}
/**
* Sets if {@link IndexWriter} should call {@link AtomicReader#checkIntegrity()}
* on existing segments before merging them into a new one.
* <p>
* Use <code>true</code> to enable this safety check, which can help
* reduce the risk of propagating index corruption from older segments
* into new ones, at the expense of slower merging.
* </p>
*/
public LiveIndexWriterConfig setCheckIntegrityAtMerge(boolean checkIntegrityAtMerge) {
this.checkIntegrityAtMerge = checkIntegrityAtMerge;
return this;
}
/** Returns true if {@link AtomicReader#checkIntegrity()} is called before
* merging segments. */
public boolean getCheckIntegrityAtMerge() {
return checkIntegrityAtMerge;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@ -499,6 +523,7 @@ public class LiveIndexWriterConfig {
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
sb.append("checkIntegrityAtMerge=").append(getCheckIntegrityAtMerge()).append("\n");
return sb.toString();
}

View File

@ -299,4 +299,12 @@ public class ParallelAtomicReader extends AtomicReader {
NumericDocValues values = reader == null ? null : reader.getNormValues(field);
return values;
}
@Override
public void checkIntegrity() throws IOException {
ensureOpen();
for (AtomicReader reader : completeReaderSet) {
reader.checkIntegrity();
}
}
}

View File

@ -95,14 +95,4 @@ final class SegmentDocValues {
IOUtils.reThrow(t);
}
}
/** Returns approximate RAM bytes used. */
synchronized long ramBytesUsed() {
long ramBytesUsed = 0;
for (RefCount<DocValuesProducer> dvp : genDVProducers.values()) {
ramBytesUsed += dvp.get().ramBytesUsed();
}
return ramBytesUsed;
}
}

View File

@ -36,11 +36,9 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.util.IOUtils;
@ -69,10 +67,10 @@ import org.apache.lucene.util.IOUtils;
* <p>
* Files:
* <ul>
* <li><tt>segments.gen</tt>: GenHeader, Generation, Generation
* <li><tt>segments.gen</tt>: GenHeader, Generation, Generation, Footer
* <li><tt>segments_N</tt>: Header, Version, NameCounter, SegCount,
* &lt;SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, UpdatesFiles&gt;<sup>SegCount</sup>,
* CommitUserData, Checksum
* CommitUserData, Footer
* </ul>
* </p>
* Data types:
@ -84,6 +82,7 @@ import org.apache.lucene.util.IOUtils;
* <li>SegName, SegCodec --&gt; {@link DataOutput#writeString String}</li>
* <li>CommitUserData --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>UpdatesFiles --&gt; {@link DataOutput#writeStringSet(Set) Set&lt;String&gt;}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
@ -98,9 +97,6 @@ import org.apache.lucene.util.IOUtils;
* there are no deletes. Anything above zero means there are deletes
* stored by {@link LiveDocsFormat}.</li>
* <li>DeletionCount records the number of deleted documents in this segment.</li>
* <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up
* until the checksum. This is used to verify integrity of the file on opening the
* index.</li>
* <li>SegCodec is the {@link Codec#getName() name} of the Codec that encoded
* this segment.</li>
* <li>CommitUserData stores an optional user-supplied opaque
@ -123,9 +119,16 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
/** The file format version for the segments_N codec header, since 4.6+. */
public static final int VERSION_46 = 1;
/** Used for the segments.gen file only!
* Whenever you add a new format, make it 1 smaller (negative version logic)! */
public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2;
/** The file format version for the segments_N codec header, since 4.8+ */
public static final int VERSION_48 = 2;
// Used for the segments.gen file only!
// Whenever you add a new format, make it 1 smaller (negative version logic)!
private static final int FORMAT_SEGMENTS_GEN_47 = -2;
private static final int FORMAT_SEGMENTS_GEN_CHECKSUM = -3;
private static final int FORMAT_SEGMENTS_GEN_START = FORMAT_SEGMENTS_GEN_47;
/** Current format of segments.gen */
public static final int FORMAT_SEGMENTS_GEN_CURRENT = FORMAT_SEGMENTS_GEN_CHECKSUM;
/** Used to name new segments. */
public int counter;
@ -266,6 +269,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT);
genOutput.writeLong(generation);
genOutput.writeLong(generation);
CodecUtil.writeFooter(genOutput);
} finally {
genOutput.close();
dir.sync(Collections.singleton(IndexFileNames.SEGMENTS_GEN));
@ -317,7 +321,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
lastGeneration = generation;
ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ);
try {
// NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
// to read the magic ourselves.
@ -326,7 +330,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
}
// 4.0+
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_46);
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_48);
version = input.readLong();
counter = input.readInt();
int numSegments = input.readInt();
@ -366,11 +370,16 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
}
userData = input.readStringStringMap();
if (format >= VERSION_48) {
CodecUtil.checkFooter(input);
} else {
final long checksumNow = input.getChecksum();
final long checksumThen = input.readLong();
if (checksumNow != checksumThen) {
throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
}
CodecUtil.checkEOF(input);
}
success = true;
} finally {
@ -402,7 +411,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
// Only non-null after prepareCommit has been called and
// before finishCommit is called
ChecksumIndexOutput pendingSegnOutput;
IndexOutput pendingSegnOutput;
private void write(Directory directory) throws IOException {
@ -415,12 +424,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
generation++;
}
ChecksumIndexOutput segnOutput = null;
IndexOutput segnOutput = null;
boolean success = false;
try {
segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName, IOContext.DEFAULT));
CodecUtil.writeHeader(segnOutput, "segments", VERSION_46);
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
CodecUtil.writeHeader(segnOutput, "segments", VERSION_48);
segnOutput.writeLong(version);
segnOutput.writeInt(counter); // write counter
segnOutput.writeInt(size()); // write infos
@ -641,9 +650,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
// a stale cache (NFS) we have a better chance of
// getting the right generation.
long genB = -1;
IndexInput genInput = null;
ChecksumIndexInput genInput = null;
try {
genInput = directory.openInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
genInput = directory.openChecksumInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
} catch (IOException e) {
if (infoStream != null) {
message("segments.gen open: IOException " + e);
@ -653,18 +662,23 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
if (genInput != null) {
try {
int version = genInput.readInt();
if (version == FORMAT_SEGMENTS_GEN_CURRENT) {
if (version == FORMAT_SEGMENTS_GEN_47 || version == FORMAT_SEGMENTS_GEN_CHECKSUM) {
long gen0 = genInput.readLong();
long gen1 = genInput.readLong();
if (infoStream != null) {
message("fallback check: " + gen0 + "; " + gen1);
}
if (version == FORMAT_SEGMENTS_GEN_CHECKSUM) {
CodecUtil.checkFooter(genInput);
} else {
CodecUtil.checkEOF(genInput);
}
if (gen0 == gen1) {
// The file is consistent.
genB = gen0;
}
} else {
throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_CURRENT, FORMAT_SEGMENTS_GEN_CURRENT);
throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_START, FORMAT_SEGMENTS_GEN_CURRENT);
}
} catch (IOException err2) {
// rethrow any format exception
@ -863,7 +877,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
}
boolean success = false;
try {
pendingSegnOutput.finishCommit();
CodecUtil.writeFooter(pendingSegnOutput);
success = true;
} finally {
if (!success) {

View File

@ -52,7 +52,13 @@ final class SegmentMerger {
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException {
MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, boolean validate) throws IOException {
// validate incoming readers
if (validate) {
for (AtomicReader reader : readers) {
reader.checkIntegrity();
}
}
mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
directory = dir;
this.codec = segmentInfo.getCodec();

View File

@ -33,10 +33,13 @@ import org.apache.lucene.util.IOUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* IndexReader implementation over a single segment.
@ -72,7 +75,8 @@ public final class SegmentReader extends AtomicReader {
}
};
final Map<String,DocValuesProducer> dvProducers = new HashMap<>();
final Map<String,DocValuesProducer> dvProducersByField = new HashMap<>();
final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
final FieldInfos fieldInfos;
@ -177,12 +181,15 @@ public final class SegmentReader extends AtomicReader {
// System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gens=" + genInfos.keySet());
// TODO: can we avoid iterating over fieldinfos several times and creating maps of all this stuff if dv updates do not exist?
for (Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
Long gen = e.getKey();
List<FieldInfo> infos = e.getValue();
DocValuesProducer dvp = segDocValues.getDocValuesProducer(gen, si, IOContext.READ, dir, dvFormat, infos);
for (FieldInfo fi : infos) {
dvProducers.put(fi.name, dvp);
dvProducersByField.put(fi.name, dvp);
dvProducers.add(dvp);
}
}
@ -250,7 +257,7 @@ public final class SegmentReader extends AtomicReader {
try {
core.decRef();
} finally {
dvProducers.clear();
dvProducersByField.clear();
try {
IOUtils.close(docValuesLocal, docsWithFieldLocal);
} finally {
@ -395,13 +402,12 @@ public final class SegmentReader extends AtomicReader {
return null;
}
DocValuesProducer dvProducer = dvProducers.get(field);
assert dvProducer != null;
Map<String,Object> dvFields = docValuesLocal.get();
NumericDocValues dvs = (NumericDocValues) dvFields.get(field);
if (dvs == null) {
DocValuesProducer dvProducer = dvProducersByField.get(field);
assert dvProducer != null;
dvs = dvProducer.getNumeric(fi);
dvFields.put(field, dvs);
}
@ -422,13 +428,12 @@ public final class SegmentReader extends AtomicReader {
return null;
}
DocValuesProducer dvProducer = dvProducers.get(field);
assert dvProducer != null;
Map<String,Bits> dvFields = docsWithFieldLocal.get();
Bits dvs = dvFields.get(field);
if (dvs == null) {
DocValuesProducer dvProducer = dvProducersByField.get(field);
assert dvProducer != null;
dvs = dvProducer.getDocsWithField(fi);
dvFields.put(field, dvs);
}
@ -444,13 +449,12 @@ public final class SegmentReader extends AtomicReader {
return null;
}
DocValuesProducer dvProducer = dvProducers.get(field);
assert dvProducer != null;
Map<String,Object> dvFields = docValuesLocal.get();
BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field);
if (dvs == null) {
DocValuesProducer dvProducer = dvProducersByField.get(field);
assert dvProducer != null;
dvs = dvProducer.getBinary(fi);
dvFields.put(field, dvs);
}
@ -466,13 +470,12 @@ public final class SegmentReader extends AtomicReader {
return null;
}
DocValuesProducer dvProducer = dvProducers.get(field);
assert dvProducer != null;
Map<String,Object> dvFields = docValuesLocal.get();
SortedDocValues dvs = (SortedDocValues) dvFields.get(field);
if (dvs == null) {
DocValuesProducer dvProducer = dvProducersByField.get(field);
assert dvProducer != null;
dvs = dvProducer.getSorted(fi);
dvFields.put(field, dvs);
}
@ -488,13 +491,12 @@ public final class SegmentReader extends AtomicReader {
return null;
}
DocValuesProducer dvProducer = dvProducers.get(field);
assert dvProducer != null;
Map<String,Object> dvFields = docValuesLocal.get();
SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field);
if (dvs == null) {
DocValuesProducer dvProducer = dvProducersByField.get(field);
assert dvProducer != null;
dvs = dvProducer.getSortedSet(fi);
dvFields.put(field, dvs);
}
@ -548,12 +550,45 @@ public final class SegmentReader extends AtomicReader {
public long ramBytesUsed() {
ensureOpen();
long ramBytesUsed = 0;
if (segDocValues != null) {
ramBytesUsed += segDocValues.ramBytesUsed();
if (dvProducers != null) {
for (DocValuesProducer producer : dvProducers) {
ramBytesUsed += producer.ramBytesUsed();
}
}
if (core != null) {
ramBytesUsed += core.ramBytesUsed();
}
return ramBytesUsed;
}
@Override
public void checkIntegrity() throws IOException {
ensureOpen();
// stored fields
getFieldsReader().checkIntegrity();
// term vectors
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader != null) {
termVectorsReader.checkIntegrity();
}
// terms/postings
if (core.fields != null) {
core.fields.checkIntegrity();
}
// norms
if (core.normsProducer != null) {
core.normsProducer.checkIntegrity();
}
// docvalues
if (dvProducers != null) {
for (DocValuesProducer producer : dvProducers) {
producer.checkIntegrity();
}
}
}
}

View File

@ -239,4 +239,12 @@ public final class SlowCompositeReaderWrapper extends AtomicReader {
// TODO: as this is a wrapper, should we really close the delegate?
in.close();
}
@Override
public void checkIntegrity() throws IOException {
ensureOpen();
for (AtomicReaderContext ctx : in.leaves()) {
ctx.reader().checkIntegrity();
}
}
}

View File

@ -0,0 +1,84 @@
package org.apache.lucene.store;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.zip.Checksum;
/**
* Wraps another {@link Checksum} with an internal buffer
* to speed up checksum calculations.
*/
public class BufferedChecksum implements Checksum {
private final Checksum in;
private final byte buffer[];
private int upto;
/** Default buffer size: 256 */
public static final int DEFAULT_BUFFERSIZE = 256;
/** Create a new BufferedChecksum with {@link #DEFAULT_BUFFERSIZE} */
public BufferedChecksum(Checksum in) {
this(in, DEFAULT_BUFFERSIZE);
}
/** Create a new BufferedChecksum with the specified bufferSize */
public BufferedChecksum(Checksum in, int bufferSize) {
this.in = in;
this.buffer = new byte[bufferSize];
}
@Override
public void update(int b) {
if (upto == buffer.length) {
flush();
}
buffer[upto++] = (byte) b;
}
@Override
public void update(byte[] b, int off, int len) {
if (len >= buffer.length) {
flush();
in.update(b, off, len);
} else {
if (upto + len > buffer.length) {
flush();
}
System.arraycopy(b, off, buffer, upto, len);
upto += len;
}
}
@Override
public long getValue() {
flush();
return in.getValue();
}
@Override
public void reset() {
upto = 0;
in.reset();
}
private void flush() {
if (upto > 0) {
in.update(buffer, 0, upto);
}
upto = 0;
}
}

View File

@ -21,41 +21,40 @@ import java.io.IOException;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
/** Writes bytes through to a primary IndexOutput, computing
* checksum.
*
* @lucene.internal
/**
* Simple implementation of {@link ChecksumIndexInput} that wraps
* another input and delegates calls.
*/
public class ChecksumIndexOutput extends IndexOutput {
IndexOutput main;
Checksum digest;
public class BufferedChecksumIndexInput extends ChecksumIndexInput {
final IndexInput main;
final Checksum digest;
public ChecksumIndexOutput(IndexOutput main) {
/** Creates a new BufferedChecksumIndexInput */
public BufferedChecksumIndexInput(IndexInput main) {
super("BufferedChecksumIndexInput(" + main + ")");
this.main = main;
digest = new CRC32();
this.digest = new BufferedChecksum(new CRC32());
}
@Override
public void writeByte(byte b) throws IOException {
public byte readByte() throws IOException {
final byte b = main.readByte();
digest.update(b);
main.writeByte(b);
return b;
}
@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
digest.update(b, offset, length);
main.writeBytes(b, offset, length);
public void readBytes(byte[] b, int offset, int len)
throws IOException {
main.readBytes(b, offset, len);
digest.update(b, offset, len);
}
@Override
public long getChecksum() {
return digest.getValue();
}
@Override
public void flush() throws IOException {
main.flush();
}
@Override
public void close() throws IOException {
main.close();
@ -66,13 +65,8 @@ public class ChecksumIndexOutput extends IndexOutput {
return main.getFilePointer();
}
/** writes the checksum */
public void finishCommit() throws IOException {
main.writeLong(getChecksum());
}
@Override
public long length() throws IOException {
public long length() {
return main.length();
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.store;
*/
import java.io.IOException;
import java.util.zip.CRC32;
/** Base implementation class for buffered {@link IndexOutput}. */
public abstract class BufferedIndexOutput extends IndexOutput {
@ -28,6 +29,7 @@ public abstract class BufferedIndexOutput extends IndexOutput {
private final byte[] buffer;
private long bufferStart = 0; // position in file of buffer
private int bufferPosition = 0; // position in buffer
private final CRC32 crc = new CRC32();
/**
* Creates a new {@link BufferedIndexOutput} with the default buffer size
@ -75,6 +77,7 @@ public abstract class BufferedIndexOutput extends IndexOutput {
if (bufferPosition > 0)
flush();
// and write data at once
crc.update(b, offset, length);
flushBuffer(b, offset, length);
bufferStart += length;
} else {
@ -99,6 +102,7 @@ public abstract class BufferedIndexOutput extends IndexOutput {
@Override
public void flush() throws IOException {
crc.update(buffer, 0, bufferPosition);
flushBuffer(buffer, bufferPosition);
bufferStart += bufferPosition;
bufferPosition = 0;
@ -141,4 +145,9 @@ public abstract class BufferedIndexOutput extends IndexOutput {
return bufferSize;
}
@Override
public long getChecksum() throws IOException {
flush();
return crc.getValue();
}
}

View File

@ -1,5 +1,7 @@
package org.apache.lucene.store;
import java.io.IOException;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -17,61 +19,24 @@ package org.apache.lucene.store;
* limitations under the License.
*/
import java.io.IOException;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
/** Reads bytes through to a primary IndexInput, computing
* checksum as it goes. Note that you cannot use seek().
*
* @lucene.internal
/**
* Extension of IndexInput, computing checksum as it goes.
* Callers can retrieve the checksum via {@link #getChecksum()}.
*/
public class ChecksumIndexInput extends IndexInput {
IndexInput main;
Checksum digest;
public abstract class ChecksumIndexInput extends IndexInput {
public ChecksumIndexInput(IndexInput main) {
super("ChecksumIndexInput(" + main + ")");
this.main = main;
digest = new CRC32();
/** resourceDescription should be a non-null, opaque string
* describing this resource; it's returned from
* {@link #toString}. */
protected ChecksumIndexInput(String resourceDescription) {
super(resourceDescription);
}
@Override
public byte readByte() throws IOException {
final byte b = main.readByte();
digest.update(b);
return b;
}
@Override
public void readBytes(byte[] b, int offset, int len)
throws IOException {
main.readBytes(b, offset, len);
digest.update(b, offset, len);
}
public long getChecksum() {
return digest.getValue();
}
@Override
public void close() throws IOException {
main.close();
}
@Override
public long getFilePointer() {
return main.getFilePointer();
}
/** Returns the current checksum value */
public abstract long getChecksum() throws IOException;
@Override
public void seek(long pos) {
throw new UnsupportedOperationException();
}
@Override
public long length() {
return main.length();
}
}

View File

@ -52,14 +52,15 @@ import java.io.IOException;
* </ul>
* <p>Description:</p>
* <ul>
* <li>Compound (.cfs) --&gt; Header, FileData <sup>FileCount</sup></li>
* <li>Compound (.cfs) --&gt; Header, FileData <sup>FileCount</sup>, Footer</li>
* <li>Compound Entry Table (.cfe) --&gt; Header, FileCount, &lt;FileName,
* DataOffset, DataLength&gt; <sup>FileCount</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>FileCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong UInt64}</li>
* <li>DataOffset,DataLength,Checksum --&gt; {@link DataOutput#writeLong UInt64}</li>
* <li>FileName --&gt; {@link DataOutput#writeString String}</li>
* <li>FileData --&gt; raw file data</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@ -87,6 +88,7 @@ public final class CompoundFileDirectory extends BaseDirectory {
private static final Map<String,FileEntry> SENTINEL = Collections.emptyMap();
private final CompoundFileWriter writer;
private final IndexInputSlicer handle;
private int version;
/**
* Create a new CompoundFileDirectory.
@ -120,15 +122,15 @@ public final class CompoundFileDirectory extends BaseDirectory {
}
/** Helper method that reads CFS entries from an input stream */
private static final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException {
private final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException {
IOException priorE = null;
IndexInput entriesStream = null;
ChecksumIndexInput entriesStream = null;
try {
final String entriesFileName = IndexFileNames.segmentFileName(
IndexFileNames.stripExtension(name), "",
IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
entriesStream = dir.openInput(entriesFileName, IOContext.READONCE);
CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_START);
entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE);
version = CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
final int numEntries = entriesStream.readVInt();
final Map<String, FileEntry> mapping = new HashMap<>(numEntries);
for (int i = 0; i < numEntries; i++) {
@ -141,8 +143,10 @@ public final class CompoundFileDirectory extends BaseDirectory {
fileEntry.offset = entriesStream.readLong();
fileEntry.length = entriesStream.readLong();
}
if (entriesStream.getFilePointer() != entriesStream.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + entriesFileName + "\": read " + entriesStream.getFilePointer() + " vs size " + entriesStream.length() + " (resource: " + entriesStream + ")");
if (version >= CompoundFileWriter.VERSION_CHECKSUM) {
CodecUtil.checkFooter(entriesStream);
} else {
CodecUtil.checkEOF(entriesStream);
}
return mapping;
} catch (IOException ioe) {

View File

@ -54,7 +54,8 @@ final class CompoundFileWriter implements Closeable{
// versioning for the .cfs file
static final String DATA_CODEC = "CompoundFileWriterData";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final int VERSION_CHECKSUM = 1;
static final int VERSION_CURRENT = VERSION_CHECKSUM;
// versioning for the .cfe file
static final String ENTRY_CODEC = "CompoundFileWriterEntries";
@ -140,6 +141,7 @@ final class CompoundFileWriter implements Closeable{
// open the compound stream
getOutput();
assert dataOut != null;
CodecUtil.writeFooter(dataOut);
} catch (IOException e) {
priorException = e;
} finally {
@ -202,6 +204,7 @@ final class CompoundFileWriter implements Closeable{
entryOut.writeLong(fe.offset);
entryOut.writeLong(fe.length);
}
CodecUtil.writeFooter(entryOut);
}
IndexOutput createOutput(String name, IOContext context) throws IOException {
@ -342,6 +345,11 @@ final class CompoundFileWriter implements Closeable{
writtenBytes += length;
delegate.writeBytes(b, offset, length);
}
@Override
public long getChecksum() throws IOException {
return delegate.getChecksum();
}
}
}

View File

@ -102,6 +102,11 @@ public abstract class Directory implements Closeable {
*/
public abstract IndexInput openInput(String name, IOContext context) throws IOException;
/** Returns a stream reading an existing file, computing checksum as it reads */
public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException {
return new BufferedChecksumIndexInput(openInput(name, context));
}
/** Construct a {@link Lock}.
* @param name the name of the lock file
*/

View File

@ -43,6 +43,8 @@ public abstract class IndexOutput extends DataOutput implements Closeable {
*/
public abstract long getFilePointer();
/** Returns the current checksum of bytes written so far */
public abstract long getChecksum() throws IOException;
/** The number of bytes in the file. */
public abstract long length() throws IOException;

View File

@ -18,6 +18,8 @@ package org.apache.lucene.store;
*/
import java.io.IOException;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
/**
* A memory-resident {@link IndexOutput} implementation.
@ -36,6 +38,8 @@ public class RAMOutputStream extends IndexOutput {
private long bufferStart;
private int bufferLength;
private Checksum crc = new BufferedChecksum(new CRC32());
/** Construct an empty output buffer. */
public RAMOutputStream() {
this(new RAMFile());
@ -95,6 +99,7 @@ public class RAMOutputStream extends IndexOutput {
bufferStart = 0;
bufferLength = 0;
file.setLength(0);
crc.reset();
}
@Override
@ -113,12 +118,14 @@ public class RAMOutputStream extends IndexOutput {
currentBufferIndex++;
switchCurrentBuffer();
}
crc.update(b);
currentBuffer[bufferPosition++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int len) throws IOException {
assert b != null;
crc.update(b, offset, len);
while (len > 0) {
if (bufferPosition == bufferLength) {
currentBufferIndex++;
@ -166,4 +173,9 @@ public class RAMOutputStream extends IndexOutput {
public long sizeInBytes() {
return (long) file.numBuffers() * (long) BUFFER_SIZE;
}
@Override
public long getChecksum() throws IOException {
return crc.getValue();
}
}

View File

@ -0,0 +1,90 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Test that a plain default puts CRC32 footers in all files.
*/
public class TestAllFilesHaveChecksumFooter extends LuceneTestCase {
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
conf.setCodec(new Lucene46Codec());
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
// these fields should sometimes get term vectors, etc
Field idField = newStringField("id", "", Field.Store.NO);
Field bodyField = newTextField("body", "", Field.Store.NO);
Field dvField = new NumericDocValuesField("dv", 5);
doc.add(idField);
doc.add(bodyField);
doc.add(dvField);
for (int i = 0; i < 100; i++) {
idField.setStringValue(Integer.toString(i));
bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
riw.addDocument(doc);
if (random().nextInt(7) == 0) {
riw.commit();
}
if (random().nextInt(20) == 0) {
riw.deleteDocuments(new Term("id", Integer.toString(i)));
}
}
riw.close();
checkHeaders(dir);
dir.close();
}
private void checkHeaders(Directory dir) throws IOException {
for (String file : dir.listAll()) {
if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) {
CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false);
checkHeaders(cfsDir); // recurse into cfs
cfsDir.close();
}
IndexInput in = null;
boolean success = false;
try {
in = dir.openInput(file, newIOContext(random()));
CodecUtil.checksumEntireFile(in);
success = true;
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
}
}
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
@ -39,14 +40,15 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
conf.setCodec(new Lucene46Codec());
// riw should sometimes create docvalues fields, etc
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
// these fields should sometimes get term vectors, etc
Field idField = newStringField("id", "", Field.Store.NO);
Field bodyField = newTextField("body", "", Field.Store.NO);
Field dvField = new NumericDocValuesField("dv", 5);
doc.add(idField);
doc.add(bodyField);
doc.add(dvField);
for (int i = 0; i < 100; i++) {
idField.setStringValue(Integer.toString(i));
bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
@ -54,6 +56,10 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
if (random().nextInt(7) == 0) {
riw.commit();
}
// TODO: we should make a new format with a clean header...
// if (random().nextInt(20) == 0) {
// riw.deleteDocuments(new Term("id", Integer.toString(i)));
// }
}
riw.close();
checkHeaders(dir);

View File

@ -222,7 +222,7 @@ public class TestDoc extends LuceneTestCase {
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
si, InfoStream.getDefault(), trackingDir,
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context);
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true);
MergeState mergeState = merger.merge();
r1.close();

View File

@ -83,7 +83,7 @@ public class TestSegmentMerger extends LuceneTestCase {
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(reader1, reader2),
si, InfoStream.getDefault(), mergedDir,
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()));
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()), true);
MergeState mergeState = merger.merge();
int docsMerged = mergeState.segmentInfo.getDocCount();
assertTrue(docsMerged == 2);

View File

@ -0,0 +1,68 @@
package org.apache.lucene.store;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.zip.CRC32;
import java.util.zip.Checksum;
import org.apache.lucene.util.LuceneTestCase;
public class TestBufferedChecksum extends LuceneTestCase {
public void testSimple() {
Checksum c = new BufferedChecksum(new CRC32());
c.update(1);
c.update(2);
c.update(3);
assertEquals(1438416925L, c.getValue());
}
public void testRandom() {
Checksum c1 = new CRC32();
Checksum c2 = new BufferedChecksum(new CRC32());
int iterations = atLeast(10000);
for (int i = 0; i < iterations; i++) {
switch(random().nextInt(4)) {
case 0:
// update(byte[], int, int)
int length = random().nextInt(1024);
byte bytes[] = new byte[length];
random().nextBytes(bytes);
c1.update(bytes, 0, bytes.length);
c2.update(bytes, 0, bytes.length);
break;
case 1:
// update(int)
int b = random().nextInt(256);
c1.update(b);
c2.update(b);
break;
case 2:
// reset()
c1.reset();
c2.reset();
break;
case 3:
// getValue()
assertEquals(c1.getValue(), c2.getValue());
break;
}
}
assertEquals(c1.getValue(), c2.getValue());
}
}

Some files were not shown because too many files have changed in this diff Show More