HDFS-3335. check for edit log corruption at the end of the log. Contributed by Colin Patrick McCabe.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1338493 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
201038272f
commit
715b4cfad1
|
@ -323,6 +323,9 @@ Release 2.0.0 - UNRELEASED
|
||||||
HDFS-3404. Make putImage in GetImageServlet infer remote address to fetch
|
HDFS-3404. Make putImage in GetImageServlet infer remote address to fetch
|
||||||
from request. (atm)
|
from request. (atm)
|
||||||
|
|
||||||
|
HDFS-3335. check for edit log corruption at the end of the log
|
||||||
|
(Colin Patrick McCabe via todd)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-2477. Optimize computing the diff between a block report and the
|
HDFS-2477. Optimize computing the diff between a block report and the
|
||||||
|
|
|
@ -108,7 +108,35 @@ public class EditLogFileInputStream extends EditLogInputStream {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected FSEditLogOp nextOp() throws IOException {
|
protected FSEditLogOp nextOp() throws IOException {
|
||||||
return reader.readOp(false);
|
FSEditLogOp op = reader.readOp(false);
|
||||||
|
if ((op != null) && (op.hasTransactionId())) {
|
||||||
|
long txId = op.getTransactionId();
|
||||||
|
if ((txId >= lastTxId) &&
|
||||||
|
(lastTxId != HdfsConstants.INVALID_TXID)) {
|
||||||
|
//
|
||||||
|
// Sometimes, the NameNode crashes while it's writing to the
|
||||||
|
// edit log. In that case, you can end up with an unfinalized edit log
|
||||||
|
// which has some garbage at the end.
|
||||||
|
// JournalManager#recoverUnfinalizedSegments will finalize these
|
||||||
|
// unfinished edit logs, giving them a defined final transaction
|
||||||
|
// ID. Then they will be renamed, so that any subsequent
|
||||||
|
// readers will have this information.
|
||||||
|
//
|
||||||
|
// Since there may be garbage at the end of these "cleaned up"
|
||||||
|
// logs, we want to be sure to skip it here if we've read everything
|
||||||
|
// we were supposed to read out of the stream.
|
||||||
|
// So we force an EOF on all subsequent reads.
|
||||||
|
//
|
||||||
|
long skipAmt = file.length() - tracker.getPos();
|
||||||
|
if (skipAmt > 0) {
|
||||||
|
FSImage.LOG.warn("skipping " + skipAmt + " bytes at the end " +
|
||||||
|
"of edit log '" + getName() + "': reached txid " + txId +
|
||||||
|
" out of " + lastTxId);
|
||||||
|
tracker.skip(skipAmt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -41,12 +41,13 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class EditLogFileOutputStream extends EditLogOutputStream {
|
public class EditLogFileOutputStream extends EditLogOutputStream {
|
||||||
private static Log LOG = LogFactory.getLog(EditLogFileOutputStream.class);
|
private static Log LOG = LogFactory.getLog(EditLogFileOutputStream.class);
|
||||||
|
public static final int PREALLOCATION_LENGTH = 1024 * 1024;
|
||||||
|
|
||||||
private File file;
|
private File file;
|
||||||
private FileOutputStream fp; // file stream for storing edit logs
|
private FileOutputStream fp; // file stream for storing edit logs
|
||||||
private FileChannel fc; // channel of the file stream for sync
|
private FileChannel fc; // channel of the file stream for sync
|
||||||
private EditsDoubleBuffer doubleBuf;
|
private EditsDoubleBuffer doubleBuf;
|
||||||
static ByteBuffer fill = ByteBuffer.allocateDirect(1024 * 1024); // preallocation, 1MB
|
static ByteBuffer fill = ByteBuffer.allocateDirect(PREALLOCATION_LENGTH);
|
||||||
|
|
||||||
static {
|
static {
|
||||||
fill.position(0);
|
fill.position(0);
|
||||||
|
|
|
@ -146,7 +146,7 @@ public class FSEditLogLoader {
|
||||||
check203UpgradeFailure(logVersion, e);
|
check203UpgradeFailure(logVersion, e);
|
||||||
String errorMessage =
|
String errorMessage =
|
||||||
formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
|
formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
|
||||||
FSImage.LOG.error(errorMessage);
|
FSImage.LOG.error(errorMessage, e);
|
||||||
if (recovery == null) {
|
if (recovery == null) {
|
||||||
// We will only try to skip over problematic opcodes when in
|
// We will only try to skip over problematic opcodes when in
|
||||||
// recovery mode.
|
// recovery mode.
|
||||||
|
@ -732,29 +732,34 @@ public class FSEditLogLoader {
|
||||||
super(is);
|
super(is);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int read() throws IOException {
|
public int read() throws IOException {
|
||||||
int ret = super.read();
|
int ret = super.read();
|
||||||
if (ret != -1) curPos++;
|
if (ret != -1) curPos++;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int read(byte[] data) throws IOException {
|
public int read(byte[] data) throws IOException {
|
||||||
int ret = super.read(data);
|
int ret = super.read(data);
|
||||||
if (ret > 0) curPos += ret;
|
if (ret > 0) curPos += ret;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int read(byte[] data, int offset, int length) throws IOException {
|
public int read(byte[] data, int offset, int length) throws IOException {
|
||||||
int ret = super.read(data, offset, length);
|
int ret = super.read(data, offset, length);
|
||||||
if (ret > 0) curPos += ret;
|
if (ret > 0) curPos += ret;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void mark(int limit) {
|
public void mark(int limit) {
|
||||||
super.mark(limit);
|
super.mark(limit);
|
||||||
markPos = curPos;
|
markPos = curPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
if (markPos == -1) {
|
if (markPos == -1) {
|
||||||
throw new IOException("Not marked!");
|
throw new IOException("Not marked!");
|
||||||
|
@ -767,6 +772,13 @@ public class FSEditLogLoader {
|
||||||
public long getPos() {
|
public long getPos() {
|
||||||
return curPos;
|
return curPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long skip(long amt) throws IOException {
|
||||||
|
long ret = super.skip(amt);
|
||||||
|
curPos += ret;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getLastAppliedTxId() {
|
public long getLastAppliedTxId() {
|
||||||
|
|
|
@ -75,6 +75,7 @@ import java.io.EOFException;
|
||||||
public abstract class FSEditLogOp {
|
public abstract class FSEditLogOp {
|
||||||
public final FSEditLogOpCodes opCode;
|
public final FSEditLogOpCodes opCode;
|
||||||
long txid;
|
long txid;
|
||||||
|
private static final int MAX_OP_SIZE = 100 * 1024 * 1024;
|
||||||
|
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
|
@ -2263,33 +2264,78 @@ public abstract class FSEditLogOp {
|
||||||
*
|
*
|
||||||
* @param skipBrokenEdits If true, attempt to skip over damaged parts of
|
* @param skipBrokenEdits If true, attempt to skip over damaged parts of
|
||||||
* the input stream, rather than throwing an IOException
|
* the input stream, rather than throwing an IOException
|
||||||
* @return the operation read from the stream, or null at the end of the file
|
* @return the operation read from the stream, or null at the end of the
|
||||||
* @throws IOException on error.
|
* file
|
||||||
|
* @throws IOException on error. This function should only throw an
|
||||||
|
* exception when skipBrokenEdits is false.
|
||||||
*/
|
*/
|
||||||
public FSEditLogOp readOp(boolean skipBrokenEdits) throws IOException {
|
public FSEditLogOp readOp(boolean skipBrokenEdits) throws IOException {
|
||||||
FSEditLogOp op = null;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
in.mark(in.available());
|
in.mark(MAX_OP_SIZE);
|
||||||
try {
|
return decodeOp();
|
||||||
op = decodeOp();
|
} catch (GarbageAfterTerminatorException e) {
|
||||||
} finally {
|
|
||||||
// If we encountered an exception or an end-of-file condition,
|
|
||||||
// do not advance the input stream.
|
|
||||||
if (op == null) {
|
|
||||||
in.reset();
|
in.reset();
|
||||||
}
|
|
||||||
}
|
|
||||||
return op;
|
|
||||||
} catch (IOException e) {
|
|
||||||
if (!skipBrokenEdits) {
|
if (!skipBrokenEdits) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
// If we saw a terminator opcode followed by a long region of 0x00 or
|
||||||
|
// 0xff, we want to skip over that region, because there's nothing
|
||||||
|
// interesting there.
|
||||||
|
long numSkip = e.getNumAfterTerminator();
|
||||||
|
if (in.skip(numSkip) < numSkip) {
|
||||||
|
FSImage.LOG.error("Failed to skip " + numSkip + " bytes of " +
|
||||||
|
"garbage after an OP_INVALID. Unexpected early EOF.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
in.reset();
|
||||||
|
if (!skipBrokenEdits) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
// FSEditLogOp#decodeOp is not supposed to throw RuntimeException.
|
||||||
|
// However, we handle it here for recovery mode, just to be more
|
||||||
|
// robust.
|
||||||
|
in.reset();
|
||||||
|
if (!skipBrokenEdits) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
} catch (Throwable e) {
|
||||||
|
in.reset();
|
||||||
|
if (!skipBrokenEdits) {
|
||||||
|
throw new IOException("got unexpected exception " +
|
||||||
|
e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Move ahead one byte and re-try the decode process.
|
||||||
if (in.skip(1) < 1) {
|
if (in.skip(1) < 1) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void verifyTerminator() throws IOException {
|
||||||
|
long off = 0;
|
||||||
|
/** The end of the edit log should contain only 0x00 or 0xff bytes.
|
||||||
|
* If it contains other bytes, the log itself may be corrupt.
|
||||||
|
* It is important to check this; if we don't, a stray OP_INVALID byte
|
||||||
|
* could make us stop reading the edit log halfway through, and we'd never
|
||||||
|
* know that we had lost data.
|
||||||
|
*/
|
||||||
|
byte[] buf = new byte[4096];
|
||||||
|
while (true) {
|
||||||
|
int numRead = in.read(buf);
|
||||||
|
if (numRead == -1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numRead; i++, off++) {
|
||||||
|
if ((buf[i] != (byte)0) && (buf[i] != (byte)-1)) {
|
||||||
|
throw new GarbageAfterTerminatorException("Read garbage after " +
|
||||||
|
"the terminator!", off);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private FSEditLogOp decodeOp() throws IOException {
|
private FSEditLogOp decodeOp() throws IOException {
|
||||||
|
@ -2306,8 +2352,10 @@ public abstract class FSEditLogOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
FSEditLogOpCodes opCode = FSEditLogOpCodes.fromByte(opCodeByte);
|
FSEditLogOpCodes opCode = FSEditLogOpCodes.fromByte(opCodeByte);
|
||||||
if (opCode == OP_INVALID)
|
if (opCode == OP_INVALID) {
|
||||||
|
verifyTerminator();
|
||||||
return null;
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
FSEditLogOp op = cache.get(opCode);
|
FSEditLogOp op = cache.get(opCode);
|
||||||
if (op == null) {
|
if (op == null) {
|
||||||
|
@ -2477,4 +2525,35 @@ public abstract class FSEditLogOp {
|
||||||
short mode = Short.valueOf(st.getValue("MODE"));
|
short mode = Short.valueOf(st.getValue("MODE"));
|
||||||
return new PermissionStatus(username, groupname, new FsPermission(mode));
|
return new PermissionStatus(username, groupname, new FsPermission(mode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exception indicating that we found an OP_INVALID followed by some
|
||||||
|
* garbage. An OP_INVALID should signify the end of the file... if there
|
||||||
|
* is additional content after that, then the edit log is corrupt.
|
||||||
|
*/
|
||||||
|
static class GarbageAfterTerminatorException extends IOException {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
private final long numAfterTerminator;
|
||||||
|
|
||||||
|
public GarbageAfterTerminatorException(String str,
|
||||||
|
long numAfterTerminator) {
|
||||||
|
super(str);
|
||||||
|
this.numAfterTerminator = numAfterTerminator;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of bytes after the terminator at which the garbage
|
||||||
|
* appeared.
|
||||||
|
*
|
||||||
|
* So if you had an OP_INVALID followed immediately by another valid opcode,
|
||||||
|
* this would be 0.
|
||||||
|
* If you had an OP_INVALID followed by some padding bytes, followed by a
|
||||||
|
* stray byte at the end, this would be the number of padding bytes.
|
||||||
|
*
|
||||||
|
* @return numAfterTerminator
|
||||||
|
*/
|
||||||
|
public long getNumAfterTerminator() {
|
||||||
|
return numAfterTerminator;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,6 +60,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.HAUtil;
|
import org.apache.hadoop.hdfs.HAUtil;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
@ -536,6 +537,11 @@ public class FSImage implements Closeable {
|
||||||
return editLog;
|
return editLog;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setEditLogForTesting(FSEditLog newLog) {
|
||||||
|
editLog = newLog;
|
||||||
|
}
|
||||||
|
|
||||||
void openEditLogForWrite() throws IOException {
|
void openEditLogForWrite() throws IOException {
|
||||||
assert editLog != null : "editLog must be initialized";
|
assert editLog != null : "editLog must be initialized";
|
||||||
editLog.openForWrite();
|
editLog.openForWrite();
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream;
|
import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream;
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
|
import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
|
||||||
|
@ -48,7 +49,8 @@ interface OfflineEditsLoader {
|
||||||
OfflineEditsLoader loader = null;
|
OfflineEditsLoader loader = null;
|
||||||
try {
|
try {
|
||||||
file = new File(inputFileName);
|
file = new File(inputFileName);
|
||||||
elis = new EditLogFileInputStream(file, -1, -1, false);
|
elis = new EditLogFileInputStream(file, HdfsConstants.INVALID_TXID,
|
||||||
|
HdfsConstants.INVALID_TXID, false);
|
||||||
loader = new OfflineEditsBinaryLoader(visitor, elis);
|
loader = new OfflineEditsBinaryLoader(visitor, elis);
|
||||||
} finally {
|
} finally {
|
||||||
if ((loader == null) && (elis != null)) {
|
if ((loader == null) && (elis != null)) {
|
||||||
|
|
|
@ -528,7 +528,7 @@ public class TestEditLog extends TestCase {
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// expected
|
// expected
|
||||||
assertEquals("Cause of exception should be ChecksumException",
|
assertEquals("Cause of exception should be ChecksumException",
|
||||||
e.getCause().getClass(), ChecksumException.class);
|
ChecksumException.class, e.getCause().getClass());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ public class TestEditLogFileOutputStream {
|
||||||
assertEquals(1, validation.getNumTransactions());
|
assertEquals(1, validation.getNumTransactions());
|
||||||
assertEquals("Edit log should have 1MB pre-allocated, plus 4 bytes " +
|
assertEquals("Edit log should have 1MB pre-allocated, plus 4 bytes " +
|
||||||
"for the version number",
|
"for the version number",
|
||||||
PREALLOCATION_LENGTH, editLog.length());
|
EditLogFileOutputStream.PREALLOCATION_LENGTH + 4, editLog.length());
|
||||||
|
|
||||||
|
|
||||||
cluster.getFileSystem().mkdirs(new Path("/tmp"),
|
cluster.getFileSystem().mkdirs(new Path("/tmp"),
|
||||||
|
@ -82,7 +82,7 @@ public class TestEditLogFileOutputStream {
|
||||||
assertEquals(2, validation.getNumTransactions());
|
assertEquals(2, validation.getNumTransactions());
|
||||||
|
|
||||||
assertEquals("Edit log should be 1MB long, plus 4 bytes for the version number",
|
assertEquals("Edit log should be 1MB long, plus 4 bytes for the version number",
|
||||||
PREALLOCATION_LENGTH, editLog.length());
|
EditLogFileOutputStream.PREALLOCATION_LENGTH + 4, editLog.length());
|
||||||
// 256 blocks for the 1MB of preallocation space
|
// 256 blocks for the 1MB of preallocation space
|
||||||
assertTrue("Edit log disk space used should be at least 257 blocks",
|
assertTrue("Edit log disk space used should be at least 257 blocks",
|
||||||
256 * 4096 <= new DU(editLog, conf).getUsed());
|
256 * 4096 <= new DU(editLog, conf).getUsed());
|
||||||
|
|
|
@ -25,6 +25,8 @@ import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
import static org.mockito.Mockito.doNothing;
|
||||||
|
import static org.mockito.Mockito.spy;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -37,7 +39,6 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache;
|
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImage;
|
import org.apache.hadoop.hdfs.server.namenode.FSImage;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
|
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
|
@ -214,12 +215,126 @@ public class TestNameNodeRecovery {
|
||||||
runEditLogTest(new EltsTestGarbageInEditLog());
|
runEditLogTest(new EltsTestGarbageInEditLog());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Test that we can successfully recover from a situation where the last
|
/**
|
||||||
* entry in the edit log has been truncated. */
|
* An algorithm for corrupting an edit log.
|
||||||
@Test(timeout=180000)
|
*/
|
||||||
public void testRecoverTruncatedEditLog() throws IOException {
|
static interface Corruptor {
|
||||||
|
/*
|
||||||
|
* Corrupt an edit log file.
|
||||||
|
*
|
||||||
|
* @param editFile The edit log file
|
||||||
|
*/
|
||||||
|
public void corrupt(File editFile) throws IOException;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Explain whether we need to read the log in recovery mode
|
||||||
|
*
|
||||||
|
* @param finalized True if the edit log in question is finalized.
|
||||||
|
* We're a little more lax about reading unfinalized
|
||||||
|
* logs. We will allow a small amount of garbage at
|
||||||
|
* the end. In a finalized log, every byte must be
|
||||||
|
* perfect.
|
||||||
|
*
|
||||||
|
* @return Whether we need to read the log in recovery mode
|
||||||
|
*/
|
||||||
|
public boolean needRecovery(boolean finalized);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the name of this corruptor
|
||||||
|
*
|
||||||
|
* @return The Corruptor name
|
||||||
|
*/
|
||||||
|
public String getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
static class TruncatingCorruptor implements Corruptor {
|
||||||
|
@Override
|
||||||
|
public void corrupt(File editFile) throws IOException {
|
||||||
|
// Corrupt the last edit
|
||||||
|
long fileLen = editFile.length();
|
||||||
|
RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
|
||||||
|
rwf.setLength(fileLen - 1);
|
||||||
|
rwf.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needRecovery(boolean finalized) {
|
||||||
|
return finalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() {
|
||||||
|
return "truncated";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class PaddingCorruptor implements Corruptor {
|
||||||
|
@Override
|
||||||
|
public void corrupt(File editFile) throws IOException {
|
||||||
|
// Add junk to the end of the file
|
||||||
|
RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
|
||||||
|
rwf.seek(editFile.length());
|
||||||
|
for (int i = 0; i < 129; i++) {
|
||||||
|
rwf.write((byte)0);
|
||||||
|
}
|
||||||
|
rwf.write(0xd);
|
||||||
|
rwf.write(0xe);
|
||||||
|
rwf.write(0xa);
|
||||||
|
rwf.write(0xd);
|
||||||
|
rwf.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needRecovery(boolean finalized) {
|
||||||
|
// With finalized edit logs, we ignore what's at the end as long as we
|
||||||
|
// can make it to the correct transaction ID.
|
||||||
|
// With unfinalized edit logs, the finalization process ignores garbage
|
||||||
|
// at the end.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() {
|
||||||
|
return "padFatal";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class SafePaddingCorruptor implements Corruptor {
|
||||||
|
private byte padByte;
|
||||||
|
|
||||||
|
public SafePaddingCorruptor(byte padByte) {
|
||||||
|
this.padByte = padByte;
|
||||||
|
assert ((this.padByte == 0) || (this.padByte == -1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void corrupt(File editFile) throws IOException {
|
||||||
|
// Add junk to the end of the file
|
||||||
|
RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
|
||||||
|
rwf.seek(editFile.length());
|
||||||
|
rwf.write((byte)-1);
|
||||||
|
for (int i = 0; i < 1024; i++) {
|
||||||
|
rwf.write(padByte);
|
||||||
|
}
|
||||||
|
rwf.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needRecovery(boolean finalized) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() {
|
||||||
|
return "pad" + ((int)padByte);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void testNameNodeRecoveryImpl(Corruptor corruptor, boolean finalize)
|
||||||
|
throws IOException {
|
||||||
final String TEST_PATH = "/test/path/dir";
|
final String TEST_PATH = "/test/path/dir";
|
||||||
final int NUM_TEST_MKDIRS = 10;
|
final int NUM_TEST_MKDIRS = 10;
|
||||||
|
final boolean needRecovery = corruptor.needRecovery(finalize);
|
||||||
|
|
||||||
// start a cluster
|
// start a cluster
|
||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
@ -230,6 +345,15 @@ public class TestNameNodeRecovery {
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
|
||||||
.build();
|
.build();
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
|
if (!finalize) {
|
||||||
|
// Normally, the in-progress edit log would be finalized by
|
||||||
|
// FSEditLog#endCurrentLogSegment. For testing purposes, we
|
||||||
|
// disable that here.
|
||||||
|
FSEditLog spyLog =
|
||||||
|
spy(cluster.getNameNode().getFSImage().getEditLog());
|
||||||
|
doNothing().when(spyLog).endCurrentLogSegment(true);
|
||||||
|
cluster.getNameNode().getFSImage().setEditLogForTesting(spyLog);
|
||||||
|
}
|
||||||
fileSys = cluster.getFileSystem();
|
fileSys = cluster.getFileSystem();
|
||||||
final FSNamesystem namesystem = cluster.getNamesystem();
|
final FSNamesystem namesystem = cluster.getNamesystem();
|
||||||
FSImage fsimage = namesystem.getFSImage();
|
FSImage fsimage = namesystem.getFSImage();
|
||||||
|
@ -246,13 +370,11 @@ public class TestNameNodeRecovery {
|
||||||
File editFile = FSImageTestUtil.findLatestEditsLog(sd).getFile();
|
File editFile = FSImageTestUtil.findLatestEditsLog(sd).getFile();
|
||||||
assertTrue("Should exist: " + editFile, editFile.exists());
|
assertTrue("Should exist: " + editFile, editFile.exists());
|
||||||
|
|
||||||
// Corrupt the last edit
|
// Corrupt the edit log
|
||||||
long fileLen = editFile.length();
|
corruptor.corrupt(editFile);
|
||||||
RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
|
|
||||||
rwf.setLength(fileLen - 1);
|
|
||||||
rwf.close();
|
|
||||||
|
|
||||||
// Make sure that we can't start the cluster normally before recovery
|
// If needRecovery == true, make sure that we can't start the
|
||||||
|
// cluster normally before recovery
|
||||||
cluster = null;
|
cluster = null;
|
||||||
try {
|
try {
|
||||||
LOG.debug("trying to start normally (this should fail)...");
|
LOG.debug("trying to start normally (this should fail)...");
|
||||||
|
@ -260,16 +382,24 @@ public class TestNameNodeRecovery {
|
||||||
.format(false).build();
|
.format(false).build();
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
fail("expected the truncated edit log to prevent normal startup");
|
if (needRecovery) {
|
||||||
|
fail("expected the corrupted edit log to prevent normal startup");
|
||||||
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// success
|
if (!needRecovery) {
|
||||||
|
LOG.error("Got unexpected failure with " + corruptor.getName() +
|
||||||
|
corruptor, e);
|
||||||
|
fail("got unexpected exception " + e.getMessage());
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null) {
|
if (cluster != null) {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform recovery
|
// Perform NameNode recovery.
|
||||||
|
// Even if there was nothing wrong previously (needRecovery == false),
|
||||||
|
// this should still work fine.
|
||||||
cluster = null;
|
cluster = null;
|
||||||
try {
|
try {
|
||||||
LOG.debug("running recovery...");
|
LOG.debug("running recovery...");
|
||||||
|
@ -291,8 +421,8 @@ public class TestNameNodeRecovery {
|
||||||
LOG.debug("starting cluster normally after recovery...");
|
LOG.debug("starting cluster normally after recovery...");
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
|
||||||
.format(false).build();
|
.format(false).build();
|
||||||
LOG.debug("testRecoverTruncatedEditLog: successfully recovered the " +
|
LOG.debug("successfully recovered the " + corruptor.getName() +
|
||||||
"truncated edit log");
|
" corrupted edit log");
|
||||||
assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH)));
|
assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH)));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
fail("failed to recover. Error message: " + e.getMessage());
|
fail("failed to recover. Error message: " + e.getMessage());
|
||||||
|
@ -302,4 +432,36 @@ public class TestNameNodeRecovery {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test that we can successfully recover from a situation where the last
|
||||||
|
* entry in the edit log has been truncated. */
|
||||||
|
@Test(timeout=180000)
|
||||||
|
public void testRecoverTruncatedEditLog() throws IOException {
|
||||||
|
testNameNodeRecoveryImpl(new TruncatingCorruptor(), true);
|
||||||
|
testNameNodeRecoveryImpl(new TruncatingCorruptor(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Test that we can successfully recover from a situation where the last
|
||||||
|
* entry in the edit log has been padded with garbage. */
|
||||||
|
@Test(timeout=180000)
|
||||||
|
public void testRecoverPaddedEditLog() throws IOException {
|
||||||
|
testNameNodeRecoveryImpl(new PaddingCorruptor(), true);
|
||||||
|
testNameNodeRecoveryImpl(new PaddingCorruptor(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Test that don't need to recover from a situation where the last
|
||||||
|
* entry in the edit log has been padded with 0. */
|
||||||
|
@Test(timeout=180000)
|
||||||
|
public void testRecoverZeroPaddedEditLog() throws IOException {
|
||||||
|
testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)0), true);
|
||||||
|
testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)0), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Test that don't need to recover from a situation where the last
|
||||||
|
* entry in the edit log has been padded with 0xff bytes. */
|
||||||
|
@Test(timeout=180000)
|
||||||
|
public void testRecoverNegativeOnePaddedEditLog() throws IOException {
|
||||||
|
testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)-1), true);
|
||||||
|
testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)-1), false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue