From fe07d9dd0e343754a772e90dce33a17f5cbb19d3 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 29 May 2020 15:45:51 +0200 Subject: [PATCH] Revert "LUCENE-9359: Always call checkFooter in SegmentInfos#readCommit. (#1483)" This reverts commit bfb6bf9c9aafc778a88000e87f082d82dba9872c. --- lucene/CHANGES.txt | 3 - .../org/apache/lucene/index/SegmentInfos.java | 261 +++++++++--------- .../apache/lucene/index/TestSegmentInfos.java | 61 ---- 3 files changed, 128 insertions(+), 197 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 50b7f7b6f95..cd42f6e2abd 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -195,9 +195,6 @@ Improvements * LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits (Tomás Fernández Löbbe) -* LUCENE-9359: SegmentInfos#readCommit now always returns a - CorruptIndexException if the content of the file is invalid. (Adrien Grand) - Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 5475fbd109d..f9edccd46f5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -304,141 +304,136 @@ public final class SegmentInfos implements Cloneable, Iterable 0) { - infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); - } else { - // else leave as null: no segments - } - - long totalDocs = 0; - for (int seg = 0; seg < numSegments; seg++) { - String segName = input.readString(); - byte[] segmentID = new byte[StringHelper.ID_LENGTH]; - input.readBytes(segmentID, 0, segmentID.length); - Codec codec = readCodec(input); - SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ); - info.setCodec(codec); - totalDocs += info.maxDoc(); - long delGen = input.readLong(); - int delCount = input.readInt(); - if (delCount < 0 || delCount > info.maxDoc()) { - throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input); - } - long fieldInfosGen = input.readLong(); - long dvGen = input.readLong(); - int softDelCount = format > VERSION_72 ? input.readInt() : 0; - if (softDelCount < 0 || softDelCount > info.maxDoc()) { - throw new CorruptIndexException("invalid deletion count: " + softDelCount + " vs maxDoc=" + info.maxDoc(), input); - } - if (softDelCount + delCount > info.maxDoc()) { - throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input); - } - final byte[] sciId; - if (format > VERSION_74) { - byte marker = input.readByte(); - switch (marker) { - case 1: - sciId = new byte[StringHelper.ID_LENGTH]; - input.readBytes(sciId, 0, sciId.length); - break; - case 0: - sciId = null; - break; - default: - throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input); - } - } else { - sciId = null; - } - SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId); - siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); - final Map> dvUpdateFiles; - final int numDVFields = input.readInt(); - if (numDVFields == 0) { - dvUpdateFiles = Collections.emptyMap(); - } else { - Map> map = new HashMap<>(numDVFields); - for (int i = 0; i < numDVFields; i++) { - map.put(input.readInt(), input.readSetOfStrings()); - } - dvUpdateFiles = Collections.unmodifiableMap(map); - } - siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); - infos.add(siPerCommit); - - Version segmentVersion = info.getVersion(); - - if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { - throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input); - } - - if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) { - throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input); - } - - if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) { - throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input); - } - } - - infos.userData = input.readMapOfStrings(); - - // LUCENE-6299: check we are in bounds - if (totalDocs > IndexWriter.getActualMaxDocs()) { - throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input); - } - - return infos; - } catch (Throwable t) { - priorE = t; - } finally { - CodecUtil.checkFooter(input, priorE); + // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need + // to read the magic ourselves. + int magic = input.readInt(); + if (magic != CodecUtil.CODEC_MAGIC) { + throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); } - throw new Error("Unreachable code"); + int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_70, VERSION_CURRENT); + byte id[] = new byte[StringHelper.ID_LENGTH]; + input.readBytes(id, 0, id.length); + CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX)); + + Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); + int indexCreatedVersion = input.readVInt(); + if (luceneVersion.major < indexCreatedVersion) { + throw new CorruptIndexException("Creation version [" + indexCreatedVersion + + ".x] can't be greater than the version that wrote the segment infos: [" + luceneVersion + "]" , input); + } + + if (indexCreatedVersion < Version.LATEST.major - 1) { + throw new IndexFormatTooOldException(input, "This index was initially created with Lucene " + + indexCreatedVersion + ".x while the current version is " + Version.LATEST + + " and Lucene only supports reading the current and previous major versions."); + } + + SegmentInfos infos = new SegmentInfos(indexCreatedVersion); + infos.id = id; + infos.generation = generation; + infos.lastGeneration = generation; + infos.luceneVersion = luceneVersion; + + infos.version = input.readLong(); + //System.out.println("READ sis version=" + infos.version); + if (format > VERSION_70) { + infos.counter = input.readVLong(); + } else { + infos.counter = input.readInt(); + } + int numSegments = input.readInt(); + if (numSegments < 0) { + throw new CorruptIndexException("invalid segment count: " + numSegments, input); + } + + if (numSegments > 0) { + infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); + } else { + // else leave as null: no segments + } + + long totalDocs = 0; + for (int seg = 0; seg < numSegments; seg++) { + String segName = input.readString(); + byte[] segmentID = new byte[StringHelper.ID_LENGTH]; + input.readBytes(segmentID, 0, segmentID.length); + Codec codec = readCodec(input); + SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ); + info.setCodec(codec); + totalDocs += info.maxDoc(); + long delGen = input.readLong(); + int delCount = input.readInt(); + if (delCount < 0 || delCount > info.maxDoc()) { + throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input); + } + long fieldInfosGen = input.readLong(); + long dvGen = input.readLong(); + int softDelCount = format > VERSION_72 ? input.readInt() : 0; + if (softDelCount < 0 || softDelCount > info.maxDoc()) { + throw new CorruptIndexException("invalid deletion count: " + softDelCount + " vs maxDoc=" + info.maxDoc(), input); + } + if (softDelCount + delCount > info.maxDoc()) { + throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input); + } + final byte[] sciId; + if (format > VERSION_74) { + byte marker = input.readByte(); + switch (marker) { + case 1: + sciId = new byte[StringHelper.ID_LENGTH]; + input.readBytes(sciId, 0, sciId.length); + break; + case 0: + sciId = null; + break; + default: + throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input); + } + } else { + sciId = null; + } + SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId); + siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); + final Map> dvUpdateFiles; + final int numDVFields = input.readInt(); + if (numDVFields == 0) { + dvUpdateFiles = Collections.emptyMap(); + } else { + Map> map = new HashMap<>(numDVFields); + for (int i = 0; i < numDVFields; i++) { + map.put(input.readInt(), input.readSetOfStrings()); + } + dvUpdateFiles = Collections.unmodifiableMap(map); + } + siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); + infos.add(siPerCommit); + + Version segmentVersion = info.getVersion(); + + if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { + throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input); + } + + if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) { + throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input); + } + + if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) { + throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input); + } + } + + infos.userData = input.readMapOfStrings(); + + CodecUtil.checkFooter(input); + + // LUCENE-6299: check we are in bounds + if (totalDocs > IndexWriter.getActualMaxDocs()) { + throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input); + } + + return infos; } private static Codec readCodec(DataInput input) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java index 23c98ad7acd..19d821481e0 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java @@ -18,16 +18,12 @@ package org.apache.lucene.index; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.search.Sort; import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; -import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.Version; import java.io.IOException; @@ -182,62 +178,5 @@ public class TestSegmentInfos extends LuceneTestCase { assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId())); } } - - public void testBitFlippedTriggersCorruptIndexException() throws IOException { - BaseDirectoryWrapper dir = newDirectory(); - dir.setCheckIndexOnClose(false); - byte id[] = StringHelper.randomId(); - Codec codec = Codec.getDefault(); - - SegmentInfos sis = new SegmentInfos(Version.LATEST.major); - SegmentInfo info = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, Codec.getDefault(), - Collections.emptyMap(), id, Collections.emptyMap(), null); - info.setFiles(Collections.emptySet()); - codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); - sis.add(commitInfo); - - info = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_1", 1, false, Codec.getDefault(), - Collections.emptyMap(), id, Collections.emptyMap(), null); - info.setFiles(Collections.emptySet()); - codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId()); - sis.add(commitInfo); - - sis.commit(dir); - - BaseDirectoryWrapper corruptDir = newDirectory(); - corruptDir.setCheckIndexOnClose(false); - boolean corrupt = false; - for (String file : dir.listAll()) { - if (file.startsWith(IndexFileNames.SEGMENTS)) { - try (IndexInput in = dir.openInput(file, IOContext.DEFAULT); - IndexOutput out = corruptDir.createOutput(file, IOContext.DEFAULT)) { - final long corruptIndex = TestUtil.nextLong(random(), 0, in.length() - 1); - out.copyBytes(in, corruptIndex); - final int b = Byte.toUnsignedInt(in.readByte()) + TestUtil.nextInt(random(), 0x01, 0xff); - out.writeByte((byte) b); - out.copyBytes(in, in.length() - in.getFilePointer()); - } - try (IndexInput in = corruptDir.openInput(file, IOContext.DEFAULT)) { - CodecUtil.checksumEntireFile(in); - if (VERBOSE) { - System.out.println("TEST: Altering the file did not update the checksum, aborting..."); - } - return; - } catch (CorruptIndexException e) { - // ok - } - corrupt = true; - } else if (slowFileExists(corruptDir, file) == false) { // extraFS - corruptDir.copyFrom(dir, file, file, IOContext.DEFAULT); - } - } - assertTrue("No segments file found", corrupt); - - expectThrows(CorruptIndexException.class, () -> SegmentInfos.readLatestCommit(corruptDir)); - dir.close(); - corruptDir.close(); - } }