[CORE] Handle truncated translog gracefully

We used to handle truncated translogs in a better manner (assuming that
the node was killed halfway through writing an operation and discarding
the last operation). This brings back that behavior by catching an
`EOFException` during the stream reading and throwing a
`TruncatedTranslogException` which can be safely ignored in
`IndexShardGateway`.

Fixes #9699
This commit is contained in:
Lee Hinman 2015-02-20 10:40:56 -07:00
parent a064f57bc2
commit 94a74ddaec
5 changed files with 70 additions and 2 deletions

View File

@ -220,7 +220,7 @@ public class IndexShardGateway extends AbstractIndexShardComponent implements Cl
in.readInt(); // ignored opSize
}
operation = stream.read(in);
} catch (EOFException e) {
} catch (TruncatedTranslogException|EOFException e) {
// ignore, not properly written the last op
logger.trace("ignoring translog EOF exception, the last operation was not properly written", e);
break;

View File

@ -65,6 +65,8 @@ public class ChecksummedTranslogStream implements TranslogStream {
Translog.Operation.Type type = Translog.Operation.Type.fromId(in.readByte());
operation = TranslogStreams.newOperationFromType(type);
operation.readFrom(in);
} catch (EOFException e) {
throw new TruncatedTranslogException("reached premature end of file, translog is truncated", e);
} catch (AssertionError|Exception e) {
throw new TranslogCorruptedException("translog corruption while reading from stream", e);
}

View File

@ -35,6 +35,7 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
@ -528,6 +529,50 @@ public abstract class AbstractSimpleTranslogTests extends ElasticsearchTestCase
assertThat("at least one corruption was caused and caught", corruptionsCaught.get(), greaterThanOrEqualTo(1));
}
@Test
public void testTruncatedTranslogs() throws Exception {
List<Translog.Location> locations = newArrayList();
int translogOperations = randomIntBetween(10, 100);
for (int op = 0; op < translogOperations; op++) {
String ascii = randomAsciiOfLengthBetween(1, 50);
locations.add(translog.add(new Translog.Create("test", "" + op, ascii.getBytes("UTF-8"))));
}
translog.sync();
truncateTranslogs(translogFileDirectory());
AtomicInteger truncations = new AtomicInteger(0);
for (Translog.Location location : locations) {
try {
translog.read(location);
} catch (ElasticsearchException e) {
if (e.getCause() instanceof EOFException) {
truncations.incrementAndGet();
} else {
throw e;
}
}
}
assertThat("at least one truncation was caused and caught", truncations.get(), greaterThanOrEqualTo(1));
}
/**
* Randomly truncate some bytes in the translog files
*/
private void truncateTranslogs(Path directory) throws Exception {
Path[] files = FileSystemUtils.files(directory, "translog-*");
for (Path file : files) {
try (FileChannel f = FileChannel.open(file, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
long prevSize = f.size();
long newSize = prevSize - randomIntBetween(1, (int) prevSize / 2);
logger.info("--> truncating {}, prev: {}, now: {}", file, prevSize, newSize);
f.truncate(newSize);
}
}
}
/**
* Randomly overwrite some bytes in the translog files
*/

View File

@ -25,7 +25,6 @@ import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.io.EOFException;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
@ -146,4 +145,26 @@ public class TranslogVersionTests extends ElasticsearchTestCase {
}
}
@Test
public void testTruncatedTranslog() throws Exception {
try {
Path translogFile = getResourcePath("/org/elasticsearch/index/translog/translog-v1-truncated.binary");
assertThat("test file should exist", Files.exists(translogFile), equalTo(true));
TranslogStream stream = TranslogStreams.translogStreamFor(translogFile);
try (StreamInput in = stream.openInput(translogFile)) {
while (true) {
try {
stream.read(in);
} catch (EOFException e) {
break;
}
}
}
fail("should have thrown an exception about the body being truncated");
} catch (TruncatedTranslogException e) {
assertThat("translog truncated: " + e.getMessage(),
e.getMessage().contains("reached premature end of file, translog is truncated"), equalTo(true));
}
}
}