Never delete translog-N.tlog file when creation fails
We today delete the translog-N.tlog file if any subsequent operation fails but we might actually be in a good state if for instance the creation of the writer failes after we sucessfully baked the new translog generation into the checkpoint. In this situation we used to delete the translog-N.tlog file and failed on the next recovery of the translog with a NoSuchFileException | FileNotFoundException just like in https://discuss.elastic.co/t/cannot-recover-index-because-of-missing-tanslog-files/38336 This commit changes the behavior and cleans up that limbo state on recovery if we already have a generation+1 file written but not baked into the checkpoint we remove that file but only if the previous ckp file has already been renamed otherwise we know we can't be in this state.
This commit is contained in:
parent
8081c782ef
commit
12b93e72f0
|
@ -163,6 +163,21 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
|
||||||
try {
|
try {
|
||||||
if (translogGeneration != null) {
|
if (translogGeneration != null) {
|
||||||
final Checkpoint checkpoint = readCheckpoint();
|
final Checkpoint checkpoint = readCheckpoint();
|
||||||
|
final Path nextTranslogFile = location.resolve(getFilename(checkpoint.generation + 1));
|
||||||
|
final Path currentCheckpointFile = location.resolve(getCommitCheckpointFileName(checkpoint.generation));
|
||||||
|
// this is special handling for error condition when we create a new writer but we fail to bake
|
||||||
|
// the newly written file (generation+1) into the checkpoint. This is still a valid state
|
||||||
|
// we just need to cleanup before we continue
|
||||||
|
// we hit this before and then blindly deleted the new generation even though we managed to bake it in and then hit this:
|
||||||
|
// https://discuss.elastic.co/t/cannot-recover-index-because-of-missing-tanslog-files/38336 as an example
|
||||||
|
//
|
||||||
|
// For this to happen we must have already copied the translog.ckp file into translog-gen.ckp so we first check if that file exists
|
||||||
|
// if not we don't even try to clean it up and wait until we fail creating it
|
||||||
|
assert Files.exists(nextTranslogFile) == false || Files.size(nextTranslogFile) <= TranslogWriter.getHeaderLength(translogUUID);
|
||||||
|
if (Files.exists(currentCheckpointFile) // current checkpoint is already copied
|
||||||
|
&& Files.deleteIfExists(nextTranslogFile)) { // delete it and log a warning
|
||||||
|
logger.warn("Deleted invalid next generation before opening writer {} this is like caused by some previously tragic exception ", nextTranslogFile.getFileName());
|
||||||
|
}
|
||||||
this.recoveredTranslogs = recoverFromFiles(translogGeneration, checkpoint);
|
this.recoveredTranslogs = recoverFromFiles(translogGeneration, checkpoint);
|
||||||
if (recoveredTranslogs.isEmpty()) {
|
if (recoveredTranslogs.isEmpty()) {
|
||||||
throw new IllegalStateException("at least one reader must be recovered");
|
throw new IllegalStateException("at least one reader must be recovered");
|
||||||
|
|
|
@ -69,9 +69,17 @@ public class TranslogWriter extends TranslogReader {
|
||||||
totalOffset = lastSyncedOffset;
|
totalOffset = lastSyncedOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int getHeaderLength(String translogUUID) {
|
||||||
|
return getHeaderLength(new BytesRef(translogUUID).length);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getHeaderLength(int uuidLength) {
|
||||||
|
return CodecUtil.headerLength(TRANSLOG_CODEC) + uuidLength + RamUsageEstimator.NUM_BYTES_INT;
|
||||||
|
}
|
||||||
|
|
||||||
public static TranslogWriter create(ShardId shardId, String translogUUID, long fileGeneration, Path file, Callback<ChannelReference> onClose, ChannelFactory channelFactory, ByteSizeValue bufferSize) throws IOException {
|
public static TranslogWriter create(ShardId shardId, String translogUUID, long fileGeneration, Path file, Callback<ChannelReference> onClose, ChannelFactory channelFactory, ByteSizeValue bufferSize) throws IOException {
|
||||||
final BytesRef ref = new BytesRef(translogUUID);
|
final BytesRef ref = new BytesRef(translogUUID);
|
||||||
final int headerLength = CodecUtil.headerLength(TRANSLOG_CODEC) + ref.length + RamUsageEstimator.NUM_BYTES_INT;
|
final int headerLength = getHeaderLength(ref.length);
|
||||||
final FileChannel channel = channelFactory.open(file);
|
final FileChannel channel = channelFactory.open(file);
|
||||||
try {
|
try {
|
||||||
// This OutputStreamDataOutput is intentionally not closed because
|
// This OutputStreamDataOutput is intentionally not closed because
|
||||||
|
@ -80,17 +88,14 @@ public class TranslogWriter extends TranslogReader {
|
||||||
CodecUtil.writeHeader(out, TRANSLOG_CODEC, VERSION);
|
CodecUtil.writeHeader(out, TRANSLOG_CODEC, VERSION);
|
||||||
out.writeInt(ref.length);
|
out.writeInt(ref.length);
|
||||||
out.writeBytes(ref.bytes, ref.offset, ref.length);
|
out.writeBytes(ref.bytes, ref.offset, ref.length);
|
||||||
channel.force(false);
|
channel.force(true);
|
||||||
writeCheckpoint(headerLength, 0, file.getParent(), fileGeneration, StandardOpenOption.WRITE);
|
writeCheckpoint(headerLength, 0, file.getParent(), fileGeneration, StandardOpenOption.WRITE);
|
||||||
final TranslogWriter writer = new TranslogWriter(shardId, fileGeneration, new ChannelReference(file, fileGeneration, channel, onClose), bufferSize);
|
final TranslogWriter writer = new TranslogWriter(shardId, fileGeneration, new ChannelReference(file, fileGeneration, channel, onClose), bufferSize);
|
||||||
return writer;
|
return writer;
|
||||||
} catch (Throwable throwable){
|
} catch (Throwable throwable){
|
||||||
|
// if we fail to bake the file-generation into the checkpoint we stick with the file and once we recover and that
|
||||||
|
// file exists we remove it. We only apply this logic to the checkpoint.generation+1 any other file with a higher generation is an error condition
|
||||||
IOUtils.closeWhileHandlingException(channel);
|
IOUtils.closeWhileHandlingException(channel);
|
||||||
try {
|
|
||||||
Files.delete(file); // remove the file as well
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throwable.addSuppressed(ex);
|
|
||||||
}
|
|
||||||
throw throwable;
|
throw throwable;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,6 +55,7 @@ import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.file.FileAlreadyExistsException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.InvalidPathException;
|
import java.nio.file.InvalidPathException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -634,7 +635,9 @@ public class TranslogTests extends ESTestCase {
|
||||||
assertFileIsPresent(translog, 1);
|
assertFileIsPresent(translog, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests that concurrent readers and writes maintain view and snapshot semantics */
|
/**
|
||||||
|
* Tests that concurrent readers and writes maintain view and snapshot semantics
|
||||||
|
*/
|
||||||
public void testConcurrentWriteViewsAndSnapshot() throws Throwable {
|
public void testConcurrentWriteViewsAndSnapshot() throws Throwable {
|
||||||
final Thread[] writers = new Thread[randomIntBetween(1, 10)];
|
final Thread[] writers = new Thread[randomIntBetween(1, 10)];
|
||||||
final Thread[] readers = new Thread[randomIntBetween(1, 10)];
|
final Thread[] readers = new Thread[randomIntBetween(1, 10)];
|
||||||
|
@ -1094,7 +1097,7 @@ public class TranslogTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRecoveryUncommittedCorryptedCheckpoint() throws IOException {
|
public void testRecoveryUncommittedCorruptedCheckpoint() throws IOException {
|
||||||
List<Translog.Location> locations = new ArrayList<>();
|
List<Translog.Location> locations = new ArrayList<>();
|
||||||
int translogOperations = 100;
|
int translogOperations = 100;
|
||||||
final int prepareOp = 44;
|
final int prepareOp = 44;
|
||||||
|
@ -1307,7 +1310,8 @@ public class TranslogTests extends ESTestCase {
|
||||||
return translog.add(op);
|
return translog.add(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void afterAdd() throws IOException {}
|
protected void afterAdd() throws IOException {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFailFlush() throws IOException {
|
public void testFailFlush() throws IOException {
|
||||||
|
@ -1520,6 +1524,7 @@ public class TranslogTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Translog getFailableTranslog(final AtomicBoolean fail, final TranslogConfig config) throws IOException {
|
private Translog getFailableTranslog(final AtomicBoolean fail, final TranslogConfig config) throws IOException {
|
||||||
return getFailableTranslog(fail, config, randomBoolean(), false);
|
return getFailableTranslog(fail, config, randomBoolean(), false);
|
||||||
}
|
}
|
||||||
|
@ -1613,4 +1618,97 @@ public class TranslogTests extends ESTestCase {
|
||||||
// all is well
|
// all is well
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRecoverWithUnbackedNextGen() throws IOException {
|
||||||
|
translog.add(new Translog.Index("test", "" + 0, Integer.toString(0).getBytes(Charset.forName("UTF-8"))));
|
||||||
|
Translog.TranslogGeneration translogGeneration = translog.getGeneration();
|
||||||
|
translog.close();
|
||||||
|
TranslogConfig config = translog.getConfig();
|
||||||
|
|
||||||
|
Path ckp = config.getTranslogPath().resolve(Translog.CHECKPOINT_FILE_NAME);
|
||||||
|
Checkpoint read = Checkpoint.read(ckp);
|
||||||
|
Files.copy(ckp, config.getTranslogPath().resolve(Translog.getCommitCheckpointFileName(read.generation)));
|
||||||
|
Files.createFile(config.getTranslogPath().resolve("translog-" + (read.generation + 1) + ".tlog"));
|
||||||
|
config.setTranslogGeneration(translogGeneration);
|
||||||
|
try (Translog tlog = new Translog(config)) {
|
||||||
|
assertNotNull(translogGeneration);
|
||||||
|
assertFalse(tlog.syncNeeded());
|
||||||
|
try (Translog.Snapshot snapshot = tlog.newSnapshot()) {
|
||||||
|
for (int i = 0; i < 1; i++) {
|
||||||
|
Translog.Operation next = snapshot.next();
|
||||||
|
assertNotNull("operation " + i + " must be non-null", next);
|
||||||
|
assertEquals("payload missmatch", i, Integer.parseInt(next.getSource().source.toUtf8()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tlog.add(new Translog.Index("test", "" + 1, Integer.toString(1).getBytes(Charset.forName("UTF-8"))));
|
||||||
|
}
|
||||||
|
try (Translog tlog = new Translog(config)) {
|
||||||
|
assertNotNull(translogGeneration);
|
||||||
|
assertFalse(tlog.syncNeeded());
|
||||||
|
try (Translog.Snapshot snapshot = tlog.newSnapshot()) {
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
Translog.Operation next = snapshot.next();
|
||||||
|
assertNotNull("operation " + i + " must be non-null", next);
|
||||||
|
assertEquals("payload missmatch", i, Integer.parseInt(next.getSource().source.toUtf8()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRecoverWithUnbackedNextGenInIllegalState() throws IOException {
|
||||||
|
translog.add(new Translog.Index("test", "" + 0, Integer.toString(0).getBytes(Charset.forName("UTF-8"))));
|
||||||
|
Translog.TranslogGeneration translogGeneration = translog.getGeneration();
|
||||||
|
translog.close();
|
||||||
|
TranslogConfig config = translog.getConfig();
|
||||||
|
Path ckp = config.getTranslogPath().resolve(Translog.CHECKPOINT_FILE_NAME);
|
||||||
|
Checkpoint read = Checkpoint.read(ckp);
|
||||||
|
// don't copy the new file
|
||||||
|
Files.createFile(config.getTranslogPath().resolve("translog-" + (read.generation + 1) + ".tlog"));
|
||||||
|
config.setTranslogGeneration(translogGeneration);
|
||||||
|
|
||||||
|
try {
|
||||||
|
Translog tlog = new Translog(config);
|
||||||
|
fail("file already exists?");
|
||||||
|
} catch (TranslogException ex) {
|
||||||
|
// all is well
|
||||||
|
assertEquals(ex.getMessage(), "failed to create new translog file");
|
||||||
|
assertEquals(ex.getCause().getClass(), FileAlreadyExistsException.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void testRecoverWithUnbackedNextGenAndFutureFile() throws IOException {
|
||||||
|
translog.add(new Translog.Index("test", "" + 0, Integer.toString(0).getBytes(Charset.forName("UTF-8"))));
|
||||||
|
Translog.TranslogGeneration translogGeneration = translog.getGeneration();
|
||||||
|
translog.close();
|
||||||
|
TranslogConfig config = translog.getConfig();
|
||||||
|
|
||||||
|
Path ckp = config.getTranslogPath().resolve(Translog.CHECKPOINT_FILE_NAME);
|
||||||
|
Checkpoint read = Checkpoint.read(ckp);
|
||||||
|
Files.copy(ckp, config.getTranslogPath().resolve(Translog.getCommitCheckpointFileName(read.generation)));
|
||||||
|
Files.createFile(config.getTranslogPath().resolve("translog-" + (read.generation + 1) + ".tlog"));
|
||||||
|
// we add N+1 and N+2 to ensure we only delete the N+1 file and never jump ahead and wipe without the right condition
|
||||||
|
Files.createFile(config.getTranslogPath().resolve("translog-" + (read.generation + 2) + ".tlog"));
|
||||||
|
config.setTranslogGeneration(translogGeneration);
|
||||||
|
try (Translog tlog = new Translog(config)) {
|
||||||
|
assertNotNull(translogGeneration);
|
||||||
|
assertFalse(tlog.syncNeeded());
|
||||||
|
try (Translog.Snapshot snapshot = tlog.newSnapshot()) {
|
||||||
|
for (int i = 0; i < 1; i++) {
|
||||||
|
Translog.Operation next = snapshot.next();
|
||||||
|
assertNotNull("operation " + i + " must be non-null", next);
|
||||||
|
assertEquals("payload missmatch", i, Integer.parseInt(next.getSource().source.toUtf8()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tlog.add(new Translog.Index("test", "" + 1, Integer.toString(1).getBytes(Charset.forName("UTF-8"))));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Translog tlog = new Translog(config);
|
||||||
|
fail("file already exists?");
|
||||||
|
} catch (TranslogException ex) {
|
||||||
|
// all is well
|
||||||
|
assertEquals(ex.getMessage(), "failed to create new translog file");
|
||||||
|
assertEquals(ex.getCause().getClass(), FileAlreadyExistsException.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue