mirror of https://github.com/apache/lucene.git
LUCENE-9359: Always call checkFooter in SegmentInfos#readCommit. (#1483)
This commit is contained in:
parent
90d84d41b3
commit
bfb6bf9c9a
|
@ -195,6 +195,9 @@ Improvements
|
||||||
* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits
|
* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits
|
||||||
(Tomás Fernández Löbbe)
|
(Tomás Fernández Löbbe)
|
||||||
|
|
||||||
|
* LUCENE-9359: SegmentInfos#readCommit now always returns a
|
||||||
|
CorruptIndexException if the content of the file is invalid. (Adrien Grand)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -304,136 +304,141 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
|
|
||||||
/** Read the commit from the provided {@link ChecksumIndexInput}. */
|
/** Read the commit from the provided {@link ChecksumIndexInput}. */
|
||||||
public static final SegmentInfos readCommit(Directory directory, ChecksumIndexInput input, long generation) throws IOException {
|
public static final SegmentInfos readCommit(Directory directory, ChecksumIndexInput input, long generation) throws IOException {
|
||||||
|
Throwable priorE = null;
|
||||||
// NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
|
try {
|
||||||
// to read the magic ourselves.
|
// NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
|
||||||
int magic = input.readInt();
|
// to read the magic ourselves.
|
||||||
if (magic != CodecUtil.CODEC_MAGIC) {
|
int magic = input.readInt();
|
||||||
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
if (magic != CodecUtil.CODEC_MAGIC) {
|
||||||
}
|
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
||||||
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_70, VERSION_CURRENT);
|
|
||||||
byte id[] = new byte[StringHelper.ID_LENGTH];
|
|
||||||
input.readBytes(id, 0, id.length);
|
|
||||||
CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
|
|
||||||
|
|
||||||
Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
|
||||||
int indexCreatedVersion = input.readVInt();
|
|
||||||
if (luceneVersion.major < indexCreatedVersion) {
|
|
||||||
throw new CorruptIndexException("Creation version [" + indexCreatedVersion
|
|
||||||
+ ".x] can't be greater than the version that wrote the segment infos: [" + luceneVersion + "]" , input);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (indexCreatedVersion < Version.LATEST.major - 1) {
|
|
||||||
throw new IndexFormatTooOldException(input, "This index was initially created with Lucene "
|
|
||||||
+ indexCreatedVersion + ".x while the current version is " + Version.LATEST
|
|
||||||
+ " and Lucene only supports reading the current and previous major versions.");
|
|
||||||
}
|
|
||||||
|
|
||||||
SegmentInfos infos = new SegmentInfos(indexCreatedVersion);
|
|
||||||
infos.id = id;
|
|
||||||
infos.generation = generation;
|
|
||||||
infos.lastGeneration = generation;
|
|
||||||
infos.luceneVersion = luceneVersion;
|
|
||||||
|
|
||||||
infos.version = input.readLong();
|
|
||||||
//System.out.println("READ sis version=" + infos.version);
|
|
||||||
if (format > VERSION_70) {
|
|
||||||
infos.counter = input.readVLong();
|
|
||||||
} else {
|
|
||||||
infos.counter = input.readInt();
|
|
||||||
}
|
|
||||||
int numSegments = input.readInt();
|
|
||||||
if (numSegments < 0) {
|
|
||||||
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numSegments > 0) {
|
|
||||||
infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
|
||||||
} else {
|
|
||||||
// else leave as null: no segments
|
|
||||||
}
|
|
||||||
|
|
||||||
long totalDocs = 0;
|
|
||||||
for (int seg = 0; seg < numSegments; seg++) {
|
|
||||||
String segName = input.readString();
|
|
||||||
byte[] segmentID = new byte[StringHelper.ID_LENGTH];
|
|
||||||
input.readBytes(segmentID, 0, segmentID.length);
|
|
||||||
Codec codec = readCodec(input);
|
|
||||||
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
|
|
||||||
info.setCodec(codec);
|
|
||||||
totalDocs += info.maxDoc();
|
|
||||||
long delGen = input.readLong();
|
|
||||||
int delCount = input.readInt();
|
|
||||||
if (delCount < 0 || delCount > info.maxDoc()) {
|
|
||||||
throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
|
|
||||||
}
|
}
|
||||||
long fieldInfosGen = input.readLong();
|
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_70, VERSION_CURRENT);
|
||||||
long dvGen = input.readLong();
|
byte id[] = new byte[StringHelper.ID_LENGTH];
|
||||||
int softDelCount = format > VERSION_72 ? input.readInt() : 0;
|
input.readBytes(id, 0, id.length);
|
||||||
if (softDelCount < 0 || softDelCount > info.maxDoc()) {
|
CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
|
||||||
throw new CorruptIndexException("invalid deletion count: " + softDelCount + " vs maxDoc=" + info.maxDoc(), input);
|
|
||||||
|
Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
||||||
|
int indexCreatedVersion = input.readVInt();
|
||||||
|
if (luceneVersion.major < indexCreatedVersion) {
|
||||||
|
throw new CorruptIndexException("Creation version [" + indexCreatedVersion
|
||||||
|
+ ".x] can't be greater than the version that wrote the segment infos: [" + luceneVersion + "]" , input);
|
||||||
}
|
}
|
||||||
if (softDelCount + delCount > info.maxDoc()) {
|
|
||||||
throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input);
|
if (indexCreatedVersion < Version.LATEST.major - 1) {
|
||||||
|
throw new IndexFormatTooOldException(input, "This index was initially created with Lucene "
|
||||||
|
+ indexCreatedVersion + ".x while the current version is " + Version.LATEST
|
||||||
|
+ " and Lucene only supports reading the current and previous major versions.");
|
||||||
}
|
}
|
||||||
final byte[] sciId;
|
|
||||||
if (format > VERSION_74) {
|
SegmentInfos infos = new SegmentInfos(indexCreatedVersion);
|
||||||
byte marker = input.readByte();
|
infos.id = id;
|
||||||
switch (marker) {
|
infos.generation = generation;
|
||||||
case 1:
|
infos.lastGeneration = generation;
|
||||||
sciId = new byte[StringHelper.ID_LENGTH];
|
infos.luceneVersion = luceneVersion;
|
||||||
input.readBytes(sciId, 0, sciId.length);
|
|
||||||
break;
|
infos.version = input.readLong();
|
||||||
case 0:
|
//System.out.println("READ sis version=" + infos.version);
|
||||||
sciId = null;
|
if (format > VERSION_70) {
|
||||||
break;
|
infos.counter = input.readVLong();
|
||||||
default:
|
|
||||||
throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
sciId = null;
|
infos.counter = input.readInt();
|
||||||
}
|
}
|
||||||
SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId);
|
int numSegments = input.readInt();
|
||||||
siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
|
if (numSegments < 0) {
|
||||||
final Map<Integer,Set<String>> dvUpdateFiles;
|
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
|
||||||
final int numDVFields = input.readInt();
|
}
|
||||||
if (numDVFields == 0) {
|
|
||||||
dvUpdateFiles = Collections.emptyMap();
|
if (numSegments > 0) {
|
||||||
|
infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
||||||
} else {
|
} else {
|
||||||
Map<Integer,Set<String>> map = new HashMap<>(numDVFields);
|
// else leave as null: no segments
|
||||||
for (int i = 0; i < numDVFields; i++) {
|
}
|
||||||
map.put(input.readInt(), input.readSetOfStrings());
|
|
||||||
|
long totalDocs = 0;
|
||||||
|
for (int seg = 0; seg < numSegments; seg++) {
|
||||||
|
String segName = input.readString();
|
||||||
|
byte[] segmentID = new byte[StringHelper.ID_LENGTH];
|
||||||
|
input.readBytes(segmentID, 0, segmentID.length);
|
||||||
|
Codec codec = readCodec(input);
|
||||||
|
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
|
||||||
|
info.setCodec(codec);
|
||||||
|
totalDocs += info.maxDoc();
|
||||||
|
long delGen = input.readLong();
|
||||||
|
int delCount = input.readInt();
|
||||||
|
if (delCount < 0 || delCount > info.maxDoc()) {
|
||||||
|
throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
|
||||||
}
|
}
|
||||||
dvUpdateFiles = Collections.unmodifiableMap(map);
|
long fieldInfosGen = input.readLong();
|
||||||
}
|
long dvGen = input.readLong();
|
||||||
siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
|
int softDelCount = format > VERSION_72 ? input.readInt() : 0;
|
||||||
infos.add(siPerCommit);
|
if (softDelCount < 0 || softDelCount > info.maxDoc()) {
|
||||||
|
throw new CorruptIndexException("invalid deletion count: " + softDelCount + " vs maxDoc=" + info.maxDoc(), input);
|
||||||
|
}
|
||||||
|
if (softDelCount + delCount > info.maxDoc()) {
|
||||||
|
throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input);
|
||||||
|
}
|
||||||
|
final byte[] sciId;
|
||||||
|
if (format > VERSION_74) {
|
||||||
|
byte marker = input.readByte();
|
||||||
|
switch (marker) {
|
||||||
|
case 1:
|
||||||
|
sciId = new byte[StringHelper.ID_LENGTH];
|
||||||
|
input.readBytes(sciId, 0, sciId.length);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
sciId = null;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sciId = null;
|
||||||
|
}
|
||||||
|
SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId);
|
||||||
|
siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
|
||||||
|
final Map<Integer,Set<String>> dvUpdateFiles;
|
||||||
|
final int numDVFields = input.readInt();
|
||||||
|
if (numDVFields == 0) {
|
||||||
|
dvUpdateFiles = Collections.emptyMap();
|
||||||
|
} else {
|
||||||
|
Map<Integer,Set<String>> map = new HashMap<>(numDVFields);
|
||||||
|
for (int i = 0; i < numDVFields; i++) {
|
||||||
|
map.put(input.readInt(), input.readSetOfStrings());
|
||||||
|
}
|
||||||
|
dvUpdateFiles = Collections.unmodifiableMap(map);
|
||||||
|
}
|
||||||
|
siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
|
||||||
|
infos.add(siPerCommit);
|
||||||
|
|
||||||
Version segmentVersion = info.getVersion();
|
Version segmentVersion = info.getVersion();
|
||||||
|
|
||||||
if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
|
if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
|
||||||
throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
|
throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) {
|
||||||
|
throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) {
|
||||||
|
throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) {
|
infos.userData = input.readMapOfStrings();
|
||||||
throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input);
|
|
||||||
|
// LUCENE-6299: check we are in bounds
|
||||||
|
if (totalDocs > IndexWriter.getActualMaxDocs()) {
|
||||||
|
throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) {
|
return infos;
|
||||||
throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input);
|
} catch (Throwable t) {
|
||||||
}
|
priorE = t;
|
||||||
|
} finally {
|
||||||
|
CodecUtil.checkFooter(input, priorE);
|
||||||
}
|
}
|
||||||
|
throw new Error("Unreachable code");
|
||||||
infos.userData = input.readMapOfStrings();
|
|
||||||
|
|
||||||
CodecUtil.checkFooter(input);
|
|
||||||
|
|
||||||
// LUCENE-6299: check we are in bounds
|
|
||||||
if (totalDocs > IndexWriter.getActualMaxDocs()) {
|
|
||||||
throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input);
|
|
||||||
}
|
|
||||||
|
|
||||||
return infos;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Codec readCodec(DataInput input) throws IOException {
|
private static Codec readCodec(DataInput input) throws IOException {
|
||||||
|
|
|
@ -18,12 +18,16 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -178,5 +182,62 @@ public class TestSegmentInfos extends LuceneTestCase {
|
||||||
assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId()));
|
assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBitFlippedTriggersCorruptIndexException() throws IOException {
|
||||||
|
BaseDirectoryWrapper dir = newDirectory();
|
||||||
|
dir.setCheckIndexOnClose(false);
|
||||||
|
byte id[] = StringHelper.randomId();
|
||||||
|
Codec codec = Codec.getDefault();
|
||||||
|
|
||||||
|
SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
|
||||||
|
SegmentInfo info = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, Codec.getDefault(),
|
||||||
|
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
|
||||||
|
info.setFiles(Collections.<String>emptySet());
|
||||||
|
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
|
||||||
|
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());
|
||||||
|
sis.add(commitInfo);
|
||||||
|
|
||||||
|
info = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_1", 1, false, Codec.getDefault(),
|
||||||
|
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
|
||||||
|
info.setFiles(Collections.<String>emptySet());
|
||||||
|
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
|
||||||
|
commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId());
|
||||||
|
sis.add(commitInfo);
|
||||||
|
|
||||||
|
sis.commit(dir);
|
||||||
|
|
||||||
|
BaseDirectoryWrapper corruptDir = newDirectory();
|
||||||
|
corruptDir.setCheckIndexOnClose(false);
|
||||||
|
boolean corrupt = false;
|
||||||
|
for (String file : dir.listAll()) {
|
||||||
|
if (file.startsWith(IndexFileNames.SEGMENTS)) {
|
||||||
|
try (IndexInput in = dir.openInput(file, IOContext.DEFAULT);
|
||||||
|
IndexOutput out = corruptDir.createOutput(file, IOContext.DEFAULT)) {
|
||||||
|
final long corruptIndex = TestUtil.nextLong(random(), 0, in.length() - 1);
|
||||||
|
out.copyBytes(in, corruptIndex);
|
||||||
|
final int b = Byte.toUnsignedInt(in.readByte()) + TestUtil.nextInt(random(), 0x01, 0xff);
|
||||||
|
out.writeByte((byte) b);
|
||||||
|
out.copyBytes(in, in.length() - in.getFilePointer());
|
||||||
|
}
|
||||||
|
try (IndexInput in = corruptDir.openInput(file, IOContext.DEFAULT)) {
|
||||||
|
CodecUtil.checksumEntireFile(in);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: Altering the file did not update the checksum, aborting...");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} catch (CorruptIndexException e) {
|
||||||
|
// ok
|
||||||
|
}
|
||||||
|
corrupt = true;
|
||||||
|
} else if (slowFileExists(corruptDir, file) == false) { // extraFS
|
||||||
|
corruptDir.copyFrom(dir, file, file, IOContext.DEFAULT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertTrue("No segments file found", corrupt);
|
||||||
|
|
||||||
|
expectThrows(CorruptIndexException.class, () -> SegmentInfos.readLatestCommit(corruptDir));
|
||||||
|
dir.close();
|
||||||
|
corruptDir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue