LUCENE-4196: add missing checks when reading up-front metadata

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1379443 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-31 13:18:22 +00:00
parent 5d6583162d
commit 63a95f1175
19 changed files with 103 additions and 31 deletions

View File

@ -23,12 +23,14 @@ import java.util.Comparator;
import java.util.Iterator;
import java.util.TreeMap;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -104,7 +106,7 @@ public class BlockTermsReader extends FieldsProducer {
// private String segment;
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, IOContext context,
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context,
int termsCacheSize, String segmentSuffix)
throws IOException {
@ -112,7 +114,7 @@ public class BlockTermsReader extends FieldsProducer {
termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
// this.segment = segment;
in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
in = dir.openInput(IndexFileNames.segmentFileName(info.name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
context);
boolean success = false;
@ -126,6 +128,9 @@ public class BlockTermsReader extends FieldsProducer {
seekDir(in, dirOffset);
final int numFields = in.readVInt();
if (numFields < 0) {
throw new CorruptIndexException("invalid number of fields: " + numFields + " (resource=" + in + ")");
}
for(int i=0;i<numFields;i++) {
final int field = in.readVInt();
final long numTerms = in.readVLong();
@ -135,8 +140,19 @@ public class BlockTermsReader extends FieldsProducer {
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
assert !fields.containsKey(fieldInfo.name);
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")");
}
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")");
}
}
success = true;
} finally {

View File

@ -27,12 +27,14 @@ import java.util.Iterator;
import java.util.Locale;
import java.util.TreeMap;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -103,14 +105,14 @@ public class BlockTreeTermsReader extends FieldsProducer {
private String segment;
public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, String segment,
public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
PostingsReaderBase postingsReader, IOContext ioContext,
String segmentSuffix, int indexDivisor)
throws IOException {
this.postingsReader = postingsReader;
this.segment = segment;
this.segment = info.name;
in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION),
ioContext);
@ -135,6 +137,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
}
final int numFields = in.readVInt();
if (numFields < 0) {
throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
}
for(int i=0;i<numFields;i++) {
final int field = in.readVInt();
@ -149,9 +154,20 @@ public class BlockTreeTermsReader extends FieldsProducer {
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")");
}
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
assert !fields.containsKey(fieldInfo.name);
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
}
}
success = true;
} finally {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.codecs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.util.BytesRef;
@ -85,6 +86,9 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
readHeader(in);
indexInterval = in.readInt();
if (indexInterval < 1) {
throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
}
this.indexDivisor = indexDivisor;
if (indexDivisor < 0) {
@ -99,17 +103,28 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
// Read directory
final int numFields = in.readVInt();
if (numFields < 0) {
throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
}
//System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
for(int i=0;i<numFields;i++) {
final int field = in.readVInt();
final int numIndexTerms = in.readVInt();
if (numIndexTerms < 0) {
throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
}
final long termsStart = in.readVLong();
final long indexStart = in.readVLong();
final long packedIndexStart = in.readVLong();
final long packedOffsetsStart = in.readVLong();
assert packedIndexStart >= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment;
if (packedIndexStart < indexStart) {
throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
}
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
}
}
success = true;
} finally {

View File

@ -23,6 +23,7 @@ import java.io.OutputStreamWriter; // for toDot
import java.io.Writer; // for toDot
import java.util.HashMap;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@ -71,12 +72,18 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
// Read directory
final int numFields = in.readVInt();
if (numFields < 0) {
throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
}
for(int i=0;i<numFields;i++) {
final int field = in.readVInt();
final long indexStart = in.readVLong();
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
fields.put(fieldInfo, new FieldIndexData(fieldInfo, indexStart));
FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, indexStart));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
}
}
success = true;
} finally {

View File

@ -64,7 +64,7 @@ class AppendingPostingsFormat extends PostingsFormat {
FieldsProducer ret = new AppendingTermsReader(
state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postings,
state.context,
state.segmentSuffix,

View File

@ -23,6 +23,7 @@ import org.apache.lucene.codecs.BlockTreeTermsReader;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -33,9 +34,9 @@ import org.apache.lucene.store.IndexInput;
*/
public class AppendingTermsReader extends BlockTreeTermsReader {
public AppendingTermsReader(Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader,
public AppendingTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader,
IOContext ioContext, String segmentSuffix, int indexDivisor) throws IOException {
super(dir, fieldInfos, segment, postingsReader, ioContext, segmentSuffix, indexDivisor);
super(dir, fieldInfos, info, postingsReader, ioContext, segmentSuffix, indexDivisor);
}
@Override

View File

@ -406,7 +406,7 @@ public final class BlockPostingsFormat extends PostingsFormat {
try {
FieldsProducer ret = new BlockTreeTermsReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postingsReader,
state.context,
state.segmentSuffix,

View File

@ -314,7 +314,7 @@ public class Lucene40PostingsFormat extends PostingsFormat {
FieldsProducer ret = new BlockTreeTermsReader(
state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postings,
state.context,
state.segmentSuffix,

View File

@ -24,6 +24,7 @@ import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
@ -50,11 +51,18 @@ public class Lucene40SegmentInfoReader extends SegmentInfoReader {
Lucene40SegmentInfoFormat.VERSION_CURRENT);
final String version = input.readString();
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount + " (resource=" + input + ")");
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Map<String,String> attributes = input.readStringStringMap();
final Set<String> files = input.readStringSet();
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile,
null, diagnostics, Collections.unmodifiableMap(attributes));
si.setFiles(files);

View File

@ -100,7 +100,7 @@ public abstract class PulsingPostingsFormat extends PostingsFormat {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
pulsingReader = new PulsingPostingsReader(docsReader);
FieldsProducer ret = new BlockTreeTermsReader(
state.dir, state.fieldInfos, state.segmentInfo.name,
state.dir, state.fieldInfos, state.segmentInfo,
pulsingReader,
state.context,
state.segmentSuffix,

View File

@ -57,10 +57,14 @@ public class FieldInfos implements Iterable<FieldInfo> {
boolean hasDocValues = false;
for (FieldInfo info : infos) {
assert !byNumber.containsKey(info.number);
byNumber.put(info.number, info);
assert !byName.containsKey(info.name);
byName.put(info.name, info);
FieldInfo previous = byNumber.put(info.number, info);
if (previous != null) {
throw new IllegalArgumentException("duplicate field numbers: " + previous.name + " and " + info.name + " have: " + info.number);
}
previous = byName.put(info.name, info);
if (previous != null) {
throw new IllegalArgumentException("duplicate field names: " + previous.number + " and " + info.number + " have: " + info.name);
}
hasVectors |= info.hasVectors();
hasProx |= info.isIndexed() && info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;

View File

@ -282,6 +282,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
version = input.readLong();
counter = input.readInt();
int numSegments = input.readInt();
if (numSegments < 0) {
throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")");
}
for(int seg=0;seg<numSegments;seg++) {
String segName = input.readString();
Codec codec = Codec.forName(input.readString());
@ -290,7 +293,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
info.setCodec(codec);
long delGen = input.readLong();
int delCount = input.readInt();
assert delCount <= info.getDocCount();
if (delCount < 0 || delCount > info.getDocCount()) {
throw new CorruptIndexException("invalid deletion count: " + delCount + " (resource: " + input + ")");
}
add(new SegmentInfoPerCommit(info, delCount, delGen));
}
userData = input.readStringStringMap();

View File

@ -34,7 +34,7 @@ public class CollectionStatistics {
public CollectionStatistics(String field, long maxDoc, long docCount, long sumTotalTermFreq, long sumDocFreq) {
assert maxDoc >= 0;
assert docCount >= -1 && docCount <= maxDoc; // #docs with field must be <= #docs
assert sumDocFreq >= -1;
assert sumDocFreq == -1 || sumDocFreq >= docCount; // #postings must be >= #docs with field
assert sumTotalTermFreq == -1 || sumTotalTermFreq >= sumDocFreq; // #positions must be >= #postings
this.field = field;
this.maxDoc = maxDoc;

View File

@ -114,7 +114,7 @@ public class Lucene40WithOrds extends PostingsFormat {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postings,
state.context,
TERMS_CACHE_SIZE,

View File

@ -183,7 +183,7 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postingsReader,
state.context,
1024,

View File

@ -207,7 +207,7 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postingsReader,
state.context,
1024,

View File

@ -328,7 +328,7 @@ public class MockRandomPostingsFormat extends PostingsFormat {
try {
fields = new BlockTreeTermsReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postingsReader,
state.context,
state.segmentSuffix,
@ -398,7 +398,7 @@ public class MockRandomPostingsFormat extends PostingsFormat {
fields = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postingsReader,
state.context,
termsCacheSize,

View File

@ -107,7 +107,7 @@ public class MockSepPostingsFormat extends PostingsFormat {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.segmentInfo,
postingsReader,
state.context,
1024,

View File

@ -81,7 +81,7 @@ public class NestedPulsingPostingsFormat extends PostingsFormat {
pulsingReaderInner = new PulsingPostingsReader(docsReader);
pulsingReader = new PulsingPostingsReader(pulsingReaderInner);
FieldsProducer ret = new BlockTreeTermsReader(
state.dir, state.fieldInfos, state.segmentInfo.name,
state.dir, state.fieldInfos, state.segmentInfo,
pulsingReader,
state.context,
state.segmentSuffix,