diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java index 14505698cb7..7d1eecea250 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java @@ -23,12 +23,14 @@ import java.util.Comparator; import java.util.Iterator; import java.util.TreeMap; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -104,7 +106,7 @@ public class BlockTermsReader extends FieldsProducer { // private String segment; - public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, IOContext context, + public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, int termsCacheSize, String segmentSuffix) throws IOException { @@ -112,7 +114,7 @@ public class BlockTermsReader extends FieldsProducer { termsCache = new DoubleBarrelLRUCache(termsCacheSize); // this.segment = segment; - in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), + in = dir.openInput(IndexFileNames.segmentFileName(info.name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), context); boolean success = false; @@ -126,6 +128,9 @@ public class BlockTermsReader extends FieldsProducer { seekDir(in, dirOffset); final int numFields = in.readVInt(); + if (numFields < 0) { + throw new CorruptIndexException("invalid number of fields: " + numFields + " (resource=" + in + ")"); + } for(int i=0;i info.getDocCount()) { // #docs with field must be <= #docs + throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); + } + if (sumDocFreq < docCount) { // #postings must be >= #docs with field + throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")"); + } + if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings + throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")"); + } + FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount)); + if (previous != null) { + throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")"); + } } success = true; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java index 75a32c9c0f2..f79ee5fb175 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java @@ -27,12 +27,14 @@ import java.util.Iterator; import java.util.Locale; import java.util.TreeMap; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -103,14 +105,14 @@ public class BlockTreeTermsReader extends FieldsProducer { private String segment; - public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, String segment, + public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext ioContext, String segmentSuffix, int indexDivisor) throws IOException { this.postingsReader = postingsReader; - this.segment = segment; + this.segment = info.name; in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION), ioContext); @@ -135,6 +137,9 @@ public class BlockTreeTermsReader extends FieldsProducer { } final int numFields = in.readVInt(); + if (numFields < 0) { + throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")"); + } for(int i=0;i info.getDocCount()) { // #docs with field must be <= #docs + throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); + } + if (sumDocFreq < docCount) { // #postings must be >= #docs with field + throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")"); + } + if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings + throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")"); + } final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0; - assert !fields.containsKey(fieldInfo.name); - fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn)); + FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn)); + if (previous != null) { + throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")"); + } } success = true; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java index 15f85189001..c655b939e0d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java @@ -20,6 +20,7 @@ package org.apache.lucene.codecs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.util.BytesRef; @@ -85,6 +86,9 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { readHeader(in); indexInterval = in.readInt(); + if (indexInterval < 1) { + throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")"); + } this.indexDivisor = indexDivisor; if (indexDivisor < 0) { @@ -98,18 +102,29 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { seekDir(in, dirOffset); // Read directory - final int numFields = in.readVInt(); + final int numFields = in.readVInt(); + if (numFields < 0) { + throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")"); + } //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields); for(int i=0;i= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment; + if (packedIndexStart < indexStart) { + throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + in + ")"); + } final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart)); + FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart)); + if (previous != null) { + throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")"); + } } success = true; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java index e418e1bb5b0..6a783491f81 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java @@ -23,6 +23,7 @@ import java.io.OutputStreamWriter; // for toDot import java.io.Writer; // for toDot import java.util.HashMap; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; @@ -71,12 +72,18 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { // Read directory final int numFields = in.readVInt(); + if (numFields < 0) { + throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")"); + } for(int i=0;i diagnostics = input.readStringStringMap(); final Map attributes = input.readStringStringMap(); final Set files = input.readStringSet(); + + if (input.getFilePointer() != input.length()) { + throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); + } final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes)); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java index ceb9e841d9a..da53cc52009 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java @@ -100,7 +100,7 @@ public abstract class PulsingPostingsFormat extends PostingsFormat { docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state); pulsingReader = new PulsingPostingsReader(docsReader); FieldsProducer ret = new BlockTreeTermsReader( - state.dir, state.fieldInfos, state.segmentInfo.name, + state.dir, state.fieldInfos, state.segmentInfo, pulsingReader, state.context, state.segmentSuffix, diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java index 33243a83c75..50df32f1473 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java @@ -57,10 +57,14 @@ public class FieldInfos implements Iterable { boolean hasDocValues = false; for (FieldInfo info : infos) { - assert !byNumber.containsKey(info.number); - byNumber.put(info.number, info); - assert !byName.containsKey(info.name); - byName.put(info.name, info); + FieldInfo previous = byNumber.put(info.number, info); + if (previous != null) { + throw new IllegalArgumentException("duplicate field numbers: " + previous.name + " and " + info.name + " have: " + info.number); + } + previous = byName.put(info.name, info); + if (previous != null) { + throw new IllegalArgumentException("duplicate field names: " + previous.number + " and " + info.number + " have: " + info.name); + } hasVectors |= info.hasVectors(); hasProx |= info.isIndexed() && info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index f1b938e3dab..3ecefb51eb9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -282,6 +282,9 @@ public final class SegmentInfos implements Cloneable, Iterable info.getDocCount()) { + throw new CorruptIndexException("invalid deletion count: " + delCount + " (resource: " + input + ")"); + } add(new SegmentInfoPerCommit(info, delCount, delGen)); } userData = input.readStringStringMap(); diff --git a/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java b/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java index 09cf54c6e40..0b2a3289925 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java +++ b/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java @@ -34,7 +34,7 @@ public class CollectionStatistics { public CollectionStatistics(String field, long maxDoc, long docCount, long sumTotalTermFreq, long sumDocFreq) { assert maxDoc >= 0; assert docCount >= -1 && docCount <= maxDoc; // #docs with field must be <= #docs - assert sumDocFreq >= -1; + assert sumDocFreq == -1 || sumDocFreq >= docCount; // #postings must be >= #docs with field assert sumTotalTermFreq == -1 || sumTotalTermFreq >= sumDocFreq; // #positions must be >= #postings this.field = field; this.maxDoc = maxDoc; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java index 38c0dcc3246..dbb76ebc787 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java @@ -114,7 +114,7 @@ public class Lucene40WithOrds extends PostingsFormat { FieldsProducer ret = new BlockTermsReader(indexReader, state.dir, state.fieldInfos, - state.segmentInfo.name, + state.segmentInfo, postings, state.context, TERMS_CACHE_SIZE, diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java index 468e1b51b73..f24947191a1 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java @@ -183,7 +183,7 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat { FieldsProducer ret = new BlockTermsReader(indexReader, state.dir, state.fieldInfos, - state.segmentInfo.name, + state.segmentInfo, postingsReader, state.context, 1024, diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java index 1a38cb34e0d..f5679cc2f5d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java @@ -207,7 +207,7 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat { FieldsProducer ret = new BlockTermsReader(indexReader, state.dir, state.fieldInfos, - state.segmentInfo.name, + state.segmentInfo, postingsReader, state.context, 1024, diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java index afb836515d3..208975345bb 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java @@ -328,7 +328,7 @@ public class MockRandomPostingsFormat extends PostingsFormat { try { fields = new BlockTreeTermsReader(state.dir, state.fieldInfos, - state.segmentInfo.name, + state.segmentInfo, postingsReader, state.context, state.segmentSuffix, @@ -398,7 +398,7 @@ public class MockRandomPostingsFormat extends PostingsFormat { fields = new BlockTermsReader(indexReader, state.dir, state.fieldInfos, - state.segmentInfo.name, + state.segmentInfo, postingsReader, state.context, termsCacheSize, diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java index 45ce698ea08..220bd398804 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java @@ -107,7 +107,7 @@ public class MockSepPostingsFormat extends PostingsFormat { FieldsProducer ret = new BlockTermsReader(indexReader, state.dir, state.fieldInfos, - state.segmentInfo.name, + state.segmentInfo, postingsReader, state.context, 1024, diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java index 9cbe7090aa1..69bbbc0ef90 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java @@ -81,7 +81,7 @@ public class NestedPulsingPostingsFormat extends PostingsFormat { pulsingReaderInner = new PulsingPostingsReader(docsReader); pulsingReader = new PulsingPostingsReader(pulsingReaderInner); FieldsProducer ret = new BlockTreeTermsReader( - state.dir, state.fieldInfos, state.segmentInfo.name, + state.dir, state.fieldInfos, state.segmentInfo, pulsingReader, state.context, state.segmentSuffix,