LUCENE-9477: Don't leave potentially broken segments file behind (#1777)

If we fail to rollback an already renamed pending segments file during
commit due to a failure in directory syncing we might not fully roll back
to a proper state if we hit a failure during rollback which leaves the index
in a broken state. This is a best effort approach to remove the renamed file
in the case of a failure during sync.
This commit is contained in:
Simon Willnauer 2020-08-24 20:19:44 +02:00 committed by GitHub
parent 8294e1ae20
commit da095bc7da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 78 additions and 7 deletions

View File

@ -786,7 +786,6 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
// Must carefully compute fileName from "generation"
// since lastGeneration isn't incremented:
final String pending = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", generation);
// Suppress so we keep throwing the original exception
// in our caller
IOUtils.deleteFilesIgnoringExceptions(dir, pending);
@ -836,16 +835,24 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
if (pendingCommit == false) {
throw new IllegalStateException("prepareCommit was not called");
}
boolean success = false;
boolean successRenameAndSync = false;
final String dest;
try {
final String src = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", generation);
dest = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
dir.rename(src, dest);
dir.syncMetaData();
success = true;
try {
dir.syncMetaData();
successRenameAndSync = true;
} finally {
if (successRenameAndSync == false) {
// at this point we already created the file but missed to sync directory let's also remove the
// renamed file
IOUtils.deleteFilesIgnoringExceptions(dir, dest);
}
}
} finally {
if (!success) {
if (successRenameAndSync == false) {
// deletes pending_segments_N:
rollbackCommit(dir);
}

View File

@ -28,6 +28,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BooleanSupplier;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@ -40,6 +41,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
@ -2065,4 +2067,64 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
assertNull(e.getCause());
}
}
public void testExceptionOnSyncMetadata() throws IOException {
try (MockDirectoryWrapper dir = newMockDirectory()) {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig().setCommitOnClose(false));
writer.commit();
AtomicBoolean maybeFailDelete = new AtomicBoolean(false);
BooleanSupplier failDelete = () -> random().nextBoolean() && maybeFailDelete.get();
dir.failOn(new MockDirectoryWrapper.Failure() {
@Override
public void eval(MockDirectoryWrapper dir) {
if (callStackContains(MockDirectoryWrapper.class, "syncMetaData")
&& callStackContains(SegmentInfos.class, "finishCommit")) {
throw new RuntimeException("boom");
} else if (failDelete.getAsBoolean() &&
callStackContains(IndexWriter.class, "rollbackInternalNoCommit") && callStackContains(IndexFileDeleter.class, "deleteFiles")) {
throw new RuntimeException("bang");
}
}});
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(newStringField("id", Integer.toString(i), Field.Store.NO));
doc.add(new NumericDocValuesField("dv", i));
doc.add(new BinaryDocValuesField("dv2", new BytesRef(Integer.toString(i))));
doc.add(new SortedDocValuesField("dv3", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i - 1))));
doc.add(new SortedNumericDocValuesField("dv5", i));
doc.add(new SortedNumericDocValuesField("dv5", i - 1));
doc.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
// ensure we store something
doc.add(new StoredField("stored1", "foo"));
doc.add(new StoredField("stored1", "bar"));
// ensure we get some payloads
doc.add(newTextField("text_payloads", TestUtil.randomAnalysisString(random(), 6, true), Field.Store.NO));
// ensure we get some vectors
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
doc.add(new IntPoint("point", random().nextInt()));
doc.add(new IntPoint("point2d", random().nextInt(), random().nextInt()));
writer.addDocument(new Document());
}
try {
writer.commit();
fail();
} catch (RuntimeException e) {
assertEquals("boom", e.getMessage());
}
try {
maybeFailDelete.set(true);
writer.rollback();
} catch (RuntimeException e) {
assertEquals("bang", e.getMessage());
}
maybeFailDelete.set(false);
assertTrue(writer.isClosed());
assertTrue(DirectoryReader.indexExists(dir));
DirectoryReader.open(dir).close();
}
}
}

View File

@ -224,7 +224,9 @@ public class TestIndexWriterOnVMError extends LuceneTestCase {
// assertTrue("hit OOM but writer is still open, WTF: ", writer.isClosed());
try {
writer.rollback();
} catch (Throwable t) {}
} catch (Throwable t) {
t.printStackTrace(log);
}
return (VirtualMachineError) e;
} else {
Rethrow.rethrow(disaster);

View File

@ -249,7 +249,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
maybeYield();
maybeThrowDeterministicException();
if (crashed) {
throw new IOException("cannot rename after crash");
throw new IOException("cannot sync metadata after crash");
}
in.syncMetaData();
}