Fix CheckIndex to detect major corruption with old (not the latest) commit point (#12530)

* #7820: add initial (failing) test case exposing the bug in CheckIndex

* #7820: initial fix to CheckIndex to detect 'on load' corruption of older segments files

* exclamation point police

* tidy

* add missing @Override in new test case

* fold feedback: merge SIS-loading logic into single for-loop; rename sis -> lastCommit

* tidy

* sidestep segments.gen file as well in case we are reading very old index

* tidy

* always load last commit point first so if it has an issue that will not be masked by issues with older commit points
This commit is contained in:
Michael McCandless 2023-11-17 12:50:39 -05:00 committed by GitHub
parent c228e4bb66
commit de820b67cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 189 additions and 39 deletions

View File

@ -29,6 +29,7 @@ import java.text.NumberFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -586,7 +587,6 @@ public final class CheckIndex implements Closeable {
ensureOpen(); ensureOpen();
long startNS = System.nanoTime(); long startNS = System.nanoTime();
SegmentInfos sis = null;
Status result = new Status(); Status result = new Status();
result.dir = dir; result.dir = dir;
String[] files = dir.listAll(); String[] files = dir.listAll();
@ -595,26 +595,99 @@ public final class CheckIndex implements Closeable {
throw new IndexNotFoundException( throw new IndexNotFoundException(
"no segments* file found in " + dir + ": files: " + Arrays.toString(files)); "no segments* file found in " + dir + ": files: " + Arrays.toString(files));
} }
try {
// Do not use SegmentInfos.read(Directory) since the spooky // https://github.com/apache/lucene/issues/7820: also attempt to open any older commit
// retrying it does is not necessary here (we hold the write lock): // points (segments_N), which will catch certain corruption like missing _N.si files
sis = // for segments not also referenced by the newest commit point (which was already
SegmentInfos.readCommit( // loaded, successfully, above). Note that we do not do a deeper check of segments
dir, lastSegmentsFile, 0 /* always open old indices if codecs are around */); // referenced ONLY by these older commit points, because such corruption would not
} catch (Throwable t) { // prevent a new IndexWriter from opening on the newest commit point. but it is still
if (failFast) { // corruption, e.g. a reader opened on those old commit points can hit corruption
throw IOUtils.rethrowAlways(t); // exceptions which we (still) will not detect here. progress not perfection!
SegmentInfos lastCommit = null;
List<String> allSegmentsFiles = new ArrayList<>();
for (String fileName : files) {
if (fileName.startsWith(IndexFileNames.SEGMENTS)
&& fileName.equals(SegmentInfos.OLD_SEGMENTS_GEN) == false) {
allSegmentsFiles.add(fileName);
} }
}
// Sort descending by generation so that we always attempt to read the last commit first. This
// way if an index has a broken last commit AND a broken old commit, we report the last commit
// error first:
allSegmentsFiles.sort(
new Comparator<String>() {
@Override
public int compare(String a, String b) {
long genA = SegmentInfos.generationFromSegmentsFileName(a);
long genB = SegmentInfos.generationFromSegmentsFileName(b);
// reversed natural sort (largest generation first):
return -Long.compare(genA, genB);
}
});
for (String fileName : allSegmentsFiles) {
boolean isLastCommit = fileName.equals(lastSegmentsFile);
SegmentInfos infos;
try {
// Do not use SegmentInfos.read(Directory) since the spooky
// retrying it does is not necessary here (we hold the write lock):
// always open old indices if codecs are around
infos = SegmentInfos.readCommit(dir, fileName, 0);
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);
}
String message;
if (isLastCommit) {
message =
"ERROR: could not read latest commit point from segments file \""
+ fileName
+ "\" in directory";
} else {
message =
"ERROR: could not read old (not latest) commit point segments file \""
+ fileName
+ "\" in directory";
}
msg(infoStream, message);
result.missingSegments = true;
if (infoStream != null) {
t.printStackTrace(infoStream);
}
return result;
}
if (isLastCommit) {
// record the latest commit point: we will deeply check all segments referenced by it
lastCommit = infos;
}
}
// we know there is a lastSegmentsFileName, so we must've attempted to load it in the above for
// loop. if it failed to load, we threw the exception (fastFail == true) or we returned the
// failure (fastFail == false). so if we get here, we should // always have a valid lastCommit:
assert lastCommit != null;
if (lastCommit == null) {
msg(infoStream, "ERROR: could not read any segments file in directory"); msg(infoStream, "ERROR: could not read any segments file in directory");
result.missingSegments = true; result.missingSegments = true;
if (infoStream != null) t.printStackTrace(infoStream);
return result; return result;
} }
if (infoStream != null) { if (infoStream != null) {
int maxDoc = 0; int maxDoc = 0;
int delCount = 0; int delCount = 0;
for (SegmentCommitInfo info : sis) { for (SegmentCommitInfo info : lastCommit) {
maxDoc += info.info.maxDoc(); maxDoc += info.info.maxDoc();
delCount += info.getDelCount(); delCount += info.getDelCount();
} }
@ -631,7 +704,7 @@ public final class CheckIndex implements Closeable {
Version oldest = null; Version oldest = null;
Version newest = null; Version newest = null;
String oldSegs = null; String oldSegs = null;
for (SegmentCommitInfo si : sis) { for (SegmentCommitInfo si : lastCommit) {
Version version = si.info.getVersion(); Version version = si.info.getVersion();
if (version == null) { if (version == null) {
// pre-3.1 segment // pre-3.1 segment
@ -646,14 +719,14 @@ public final class CheckIndex implements Closeable {
} }
} }
final int numSegments = sis.size(); final int numSegments = lastCommit.size();
final String segmentsFileName = sis.getSegmentsFileName(); final String segmentsFileName = lastCommit.getSegmentsFileName();
result.segmentsFileName = segmentsFileName; result.segmentsFileName = segmentsFileName;
result.numSegments = numSegments; result.numSegments = numSegments;
result.userData = sis.getUserData(); result.userData = lastCommit.getUserData();
String userDataString; String userDataString;
if (sis.getUserData().size() > 0) { if (lastCommit.getUserData().size() > 0) {
userDataString = " userData=" + sis.getUserData(); userDataString = " userData=" + lastCommit.getUserData();
} else { } else {
userDataString = ""; userDataString = "";
} }
@ -681,7 +754,7 @@ public final class CheckIndex implements Closeable {
+ " " + " "
+ versionString + versionString
+ " id=" + " id="
+ StringHelper.idToString(sis.getId()) + StringHelper.idToString(lastCommit.getId())
+ userDataString); + userDataString);
if (onlySegments != null) { if (onlySegments != null) {
@ -696,14 +769,14 @@ public final class CheckIndex implements Closeable {
msg(infoStream, ":"); msg(infoStream, ":");
} }
result.newSegments = sis.clone(); result.newSegments = lastCommit.clone();
result.newSegments.clear(); result.newSegments.clear();
result.maxSegmentName = -1; result.maxSegmentName = -1;
// checks segments sequentially // checks segments sequentially
if (executorService == null) { if (executorService == null) {
for (int i = 0; i < numSegments; i++) { for (int i = 0; i < numSegments; i++) {
final SegmentCommitInfo info = sis.info(i); final SegmentCommitInfo info = lastCommit.info(i);
updateMaxSegmentName(result, info); updateMaxSegmentName(result, info);
if (onlySegments != null && !onlySegments.contains(info.info.name)) { if (onlySegments != null && !onlySegments.contains(info.info.name)) {
continue; continue;
@ -718,7 +791,7 @@ public final class CheckIndex implements Closeable {
+ info.info.name + info.info.name
+ " maxDoc=" + " maxDoc="
+ info.info.maxDoc()); + info.info.maxDoc());
Status.SegmentInfoStatus segmentInfoStatus = testSegment(sis, info, infoStream); Status.SegmentInfoStatus segmentInfoStatus = testSegment(lastCommit, info, infoStream);
processSegmentInfoStatusResult(result, info, segmentInfoStatus); processSegmentInfoStatusResult(result, info, segmentInfoStatus);
} }
@ -729,7 +802,7 @@ public final class CheckIndex implements Closeable {
// checks segments concurrently // checks segments concurrently
List<SegmentCommitInfo> segmentCommitInfos = new ArrayList<>(); List<SegmentCommitInfo> segmentCommitInfos = new ArrayList<>();
for (SegmentCommitInfo sci : sis) { for (SegmentCommitInfo sci : lastCommit) {
segmentCommitInfos.add(sci); segmentCommitInfos.add(sci);
} }
@ -757,7 +830,7 @@ public final class CheckIndex implements Closeable {
continue; continue;
} }
SegmentInfos finalSis = sis; SegmentInfos finalSis = lastCommit;
ByteArrayOutputStream output = new ByteArrayOutputStream(); ByteArrayOutputStream output = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(output, true, IOUtils.UTF_8); PrintStream stream = new PrintStream(output, true, IOUtils.UTF_8);
@ -813,7 +886,7 @@ public final class CheckIndex implements Closeable {
if (0 == result.numBadSegments) { if (0 == result.numBadSegments) {
result.clean = true; result.clean = true;
} else } else {
msg( msg(
infoStream, infoStream,
"WARNING: " "WARNING: "
@ -821,14 +894,16 @@ public final class CheckIndex implements Closeable {
+ " broken segments (containing " + " broken segments (containing "
+ result.totLoseDocCount + result.totLoseDocCount
+ " documents) detected"); + " documents) detected");
}
if (!(result.validCounter = (result.maxSegmentName < sis.counter))) { result.validCounter = result.maxSegmentName < lastCommit.counter;
if (result.validCounter == false) {
result.clean = false; result.clean = false;
result.newSegments.counter = result.maxSegmentName + 1; result.newSegments.counter = result.maxSegmentName + 1;
msg( msg(
infoStream, infoStream,
"ERROR: Next segment name counter " "ERROR: Next segment name counter "
+ sis.counter + lastCommit.counter
+ " is not greater than max segment name " + " is not greater than max segment name "
+ result.maxSegmentName); + result.maxSegmentName);
} }
@ -921,7 +996,7 @@ public final class CheckIndex implements Closeable {
msg(infoStream, " diagnostics = " + diagnostics); msg(infoStream, " diagnostics = " + diagnostics);
} }
if (!info.hasDeletions()) { if (info.hasDeletions() == false) {
msg(infoStream, " no deletions"); msg(infoStream, " no deletions");
segInfoStat.hasDeletions = false; segInfoStat.hasDeletions = false;
} else { } else {
@ -1213,7 +1288,7 @@ public final class CheckIndex implements Closeable {
if (liveDocs != null) { if (liveDocs != null) {
// it's ok for it to be non-null here, as long as none are set right? // it's ok for it to be non-null here, as long as none are set right?
for (int j = 0; j < liveDocs.length(); j++) { for (int j = 0; j < liveDocs.length(); j++) {
if (!liveDocs.get(j)) { if (liveDocs.get(j) == false) {
throw new CheckIndexException( throw new CheckIndexException(
"liveDocs mismatch: info says no deletions but doc " + j + " is deleted."); "liveDocs mismatch: info says no deletions but doc " + j + " is deleted.");
} }
@ -1450,7 +1525,7 @@ public final class CheckIndex implements Closeable {
+ hasFreqs); + hasFreqs);
} }
if (!isVectors) { if (isVectors == false) {
final boolean expectedHasPositions = final boolean expectedHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
if (hasPositions != expectedHasPositions) { if (hasPositions != expectedHasPositions) {
@ -1810,7 +1885,7 @@ public final class CheckIndex implements Closeable {
// free-for-all before? // free-for-all before?
// but for offsets in the postings lists these checks are fine: they were always // but for offsets in the postings lists these checks are fine: they were always
// enforced by IndexWriter // enforced by IndexWriter
if (!isVectors) { if (isVectors == false) {
if (startOffset < 0) { if (startOffset < 0) {
throw new CheckIndexException( throw new CheckIndexException(
"term " "term "
@ -3660,7 +3735,7 @@ public final class CheckIndex implements Closeable {
// Make sure FieldInfo thinks this field is vector'd: // Make sure FieldInfo thinks this field is vector'd:
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
if (!fieldInfo.hasVectors()) { if (fieldInfo.hasVectors() == false) {
throw new CheckIndexException( throw new CheckIndexException(
"docID=" "docID="
+ j + j
@ -3696,7 +3771,7 @@ public final class CheckIndex implements Closeable {
postings = termsEnum.postings(postings, PostingsEnum.ALL); postings = termsEnum.postings(postings, PostingsEnum.ALL);
assert postings != null; assert postings != null;
if (!postingsTermsEnum.seekExact(term)) { if (postingsTermsEnum.seekExact(term) == false) {
throw new CheckIndexException( throw new CheckIndexException(
"vector term=" "vector term="
+ term + term
@ -3852,7 +3927,7 @@ public final class CheckIndex implements Closeable {
+ " but postings does not."); + " but postings does not.");
} }
BytesRef postingsPayload = postingsDocs.getPayload(); BytesRef postingsPayload = postingsDocs.getPayload();
if (!payload.equals(postingsPayload)) { if (payload.equals(postingsPayload) == false) {
throw new CheckIndexException( throw new CheckIndexException(
"vector term=" "vector term="
+ term + term
@ -4011,9 +4086,10 @@ public final class CheckIndex implements Closeable {
return 1; return 1;
} }
if (!assertsOn()) if (assertsOn() == false) {
System.out.println( System.out.println(
"\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled"); "\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
}
System.out.println("\nOpening index @ " + opts.indexPath + "\n"); System.out.println("\nOpening index @ " + opts.indexPath + "\n");
Directory directory = null; Directory directory = null;
@ -4166,8 +4242,8 @@ public final class CheckIndex implements Closeable {
return 1; return 1;
} }
if (!result.clean) { if (result.clean == false) {
if (!opts.doExorcise) { if (opts.doExorcise == false) {
opts.out.println( opts.out.println(
"WARNING: would write new segments file, and " "WARNING: would write new segments file, and "
+ result.totLoseDocCount + result.totLoseDocCount

View File

@ -122,7 +122,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
static final int VERSION_CURRENT = VERSION_86; static final int VERSION_CURRENT = VERSION_86;
/** Name of the generation reference file name */ /** Name of the generation reference file name */
private static final String OLD_SEGMENTS_GEN = "segments.gen"; static final String OLD_SEGMENTS_GEN = "segments.gen";
/** Used to name new segments. */ /** Used to name new segments. */
public long counter; public long counter;
@ -146,7 +146,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
* *
* @see #setInfoStream * @see #setInfoStream
*/ */
private static PrintStream infoStream = null; private static PrintStream infoStream;
/** Id for this commit; only written starting with Lucene 5.0 */ /** Id for this commit; only written starting with Lucene 5.0 */
private byte[] id; private byte[] id;

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import org.apache.lucene.document.BinaryPoint; import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
@ -34,6 +35,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.CannedTokenStream; import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.analysis.Token; import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.BaseTestCheckIndex; import org.apache.lucene.tests.index.BaseTestCheckIndex;
import org.apache.lucene.tests.store.MockDirectoryWrapper;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -218,4 +220,76 @@ public class TestCheckIndex extends BaseTestCheckIndex {
VectorUtil.l2normalize(v); VectorUtil.l2normalize(v);
return v; return v;
} }
// Never deletes any commit points! Do not use in production!!
private static class DeleteNothingIndexDeletionPolicy extends IndexDeletionPolicy {
public static final IndexDeletionPolicy INSTANCE = new DeleteNothingIndexDeletionPolicy();
private DeleteNothingIndexDeletionPolicy() {
// NO
}
@Override
public void onInit(List<? extends IndexCommit> commits) {}
@Override
public void onCommit(List<? extends IndexCommit> commits) {}
}
// https://github.com/apache/lucene/issues/7820 -- when the most recent commit point in
// the index is OK, but older commit points are broken, CheckIndex fails to detect and
// correct that, while opening an IndexWriter on the index will fail since IndexWriter
// loads all commit points on init
public void testPriorBrokenCommitPoint() throws Exception {
try (MockDirectoryWrapper dir = newMockDirectory()) {
// disable this normally useful test infra feature since this test intentionally leaves broken
// indices:
dir.setCheckIndexOnClose(false);
IndexWriterConfig iwc =
new IndexWriterConfig()
.setMergePolicy(NoMergePolicy.INSTANCE)
.setIndexDeletionPolicy(DeleteNothingIndexDeletionPolicy.INSTANCE);
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
// create first segment, and commit point referencing only segment 0
Document doc = new Document();
doc.add(new StringField("id", "a", Field.Store.NO));
iw.addDocument(doc);
iw.commit();
// NOTE: we are (illegally) relying on precise file naming here -- if Codec or IW's
// behaviour changes, this may need fixing:
assertTrue(slowFileExists(dir, "_0.si"));
// create second segment, and another commit point referencing only segment 1
doc.add(new StringField("id", "a", Field.Store.NO));
iw.updateDocument(new Term("id", "a"), doc);
iw.commit();
// NOTE: we are (illegally) relying on precise file naming here -- if Codec or IW's
// behaviour changes, this may need fixing:
assertTrue(slowFileExists(dir, "_0.si"));
assertTrue(slowFileExists(dir, "_1.si"));
}
try (CheckIndex checkers = new CheckIndex(dir)) {
CheckIndex.Status checkIndexStatus = checkers.checkIndex();
assertTrue(checkIndexStatus.clean);
}
// now corrupt segment 0, which is referenced by only the first commit point, by removing its
// .si file (_0.si)
dir.deleteFile("_0.si");
try (CheckIndex checkers = new CheckIndex(dir)) {
CheckIndex.Status checkIndexStatus = checkers.checkIndex();
assertFalse(checkIndexStatus.clean);
}
}
}
} }