HDFS-13314. NameNode should optionally exit if it detects FsImage corruption. Contributed by Arpit Agarwal.

This commit is contained in:
Arpit Agarwal 2018-03-28 11:37:34 -07:00
parent cc0a791794
commit 7e5c8faeb7
3 changed files with 101 additions and 14 deletions

View File

@ -34,6 +34,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -67,6 +68,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.Canceler;
import org.apache.hadoop.hdfs.util.MD5FileUtils; import org.apache.hadoop.hdfs.util.MD5FileUtils;
import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
@ -85,6 +87,10 @@ public class FSImage implements Closeable {
protected FSEditLog editLog = null; protected FSEditLog editLog = null;
private boolean isUpgradeFinalized = false; private boolean isUpgradeFinalized = false;
// If true, then image corruption was detected. The NameNode process will
// exit immediately after saving the image.
private AtomicBoolean exitAfterSave = new AtomicBoolean(false);
protected NNStorage storage; protected NNStorage storage;
/** /**
@ -957,8 +963,14 @@ public class FSImage implements Closeable {
FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context); FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context);
FSImageCompression compression = FSImageCompression.createCompression(conf); FSImageCompression compression = FSImageCompression.createCompression(conf);
saver.save(newFile, compression); long numErrors = saver.save(newFile, compression);
if (numErrors > 0) {
// The image is likely corrupted.
LOG.error("Detected " + numErrors + " errors while saving FsImage " +
dstFile);
exitAfterSave.set(true);
}
MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest()); MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest());
storage.setMostRecentCheckpointInfo(txid, Time.now()); storage.setMostRecentCheckpointInfo(txid, Time.now());
} }
@ -1096,6 +1108,12 @@ public class FSImage implements Closeable {
} }
//Update NameDirSize Metric //Update NameDirSize Metric
getStorage().updateNameDirSize(); getStorage().updateNameDirSize();
if (exitAfterSave.get()) {
LOG.fatal("NameNode process will exit now... The saved FsImage " +
nnf + " is potentially corrupted.");
ExitUtil.terminate(-1);
}
} }
/** /**
@ -1163,8 +1181,11 @@ public class FSImage implements Closeable {
// Since we now have a new checkpoint, we can clean up some // Since we now have a new checkpoint, we can clean up some
// old edit logs and checkpoints. // old edit logs and checkpoints.
purgeOldStorage(nnf); // Do not purge anything if we just wrote a corrupted FsImage.
archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW); if (!exitAfterSave.get()) {
purgeOldStorage(nnf);
archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
}
} finally { } finally {
// Notify any threads waiting on the checkpoint to be canceled // Notify any threads waiting on the checkpoint to be canceled
// that it is complete. // that it is complete.

View File

@ -418,15 +418,22 @@ public final class FSImageFormatProtobuf {
sectionOutputStream.flush(); sectionOutputStream.flush();
} }
void save(File file, FSImageCompression compression) throws IOException { /**
* @return number of non-fatal errors detected while writing the image.
* @throws IOException on fatal error.
*/
long save(File file, FSImageCompression compression) throws IOException {
FileOutputStream fout = new FileOutputStream(file); FileOutputStream fout = new FileOutputStream(file);
fileChannel = fout.getChannel(); fileChannel = fout.getChannel();
try { try {
LOG.info("Saving image file {} using {}", file, compression); LOG.info("Saving image file {} using {}", file, compression);
long startTime = monotonicNow(); long startTime = monotonicNow();
saveInternal(fout, compression, file.getAbsolutePath()); long numErrors = saveInternal(
LOG.info("Image file {} of size {} bytes saved in {} seconds.", file, fout, compression, file.getAbsolutePath());
file.length(), (monotonicNow() - startTime) / 1000); LOG.info("Image file {} of size {} bytes saved in {} seconds {}.", file,
file.length(), (monotonicNow() - startTime) / 1000,
(numErrors > 0 ? (" with" + numErrors + " errors") : ""));
return numErrors;
} finally { } finally {
fout.close(); fout.close();
} }
@ -450,7 +457,11 @@ public final class FSImageFormatProtobuf {
saver.serializeFilesUCSection(sectionOutputStream); saver.serializeFilesUCSection(sectionOutputStream);
} }
private void saveSnapshots(FileSummary.Builder summary) throws IOException { /**
* @return number of non-fatal errors detected while saving the image.
* @throws IOException on fatal error.
*/
private long saveSnapshots(FileSummary.Builder summary) throws IOException {
FSImageFormatPBSnapshot.Saver snapshotSaver = new FSImageFormatPBSnapshot.Saver( FSImageFormatPBSnapshot.Saver snapshotSaver = new FSImageFormatPBSnapshot.Saver(
this, summary, context, context.getSourceNamesystem()); this, summary, context, context.getSourceNamesystem());
@ -461,9 +472,14 @@ public final class FSImageFormatProtobuf {
snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream); snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream);
} }
snapshotSaver.serializeINodeReferenceSection(sectionOutputStream); snapshotSaver.serializeINodeReferenceSection(sectionOutputStream);
return snapshotSaver.getNumImageErrors();
} }
private void saveInternal(FileOutputStream fout, /**
* @return number of non-fatal errors detected while writing the FsImage.
* @throws IOException on fatal error.
*/
private long saveInternal(FileOutputStream fout,
FSImageCompression compression, String filePath) throws IOException { FSImageCompression compression, String filePath) throws IOException {
StartupProgress prog = NameNode.getStartupProgress(); StartupProgress prog = NameNode.getStartupProgress();
MessageDigest digester = MD5Hash.getDigester(); MessageDigest digester = MD5Hash.getDigester();
@ -496,7 +512,7 @@ public final class FSImageFormatProtobuf {
Step step = new Step(StepType.INODES, filePath); Step step = new Step(StepType.INODES, filePath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step); prog.beginStep(Phase.SAVING_CHECKPOINT, step);
saveInodes(b); saveInodes(b);
saveSnapshots(b); long numErrors = saveSnapshots(b);
prog.endStep(Phase.SAVING_CHECKPOINT, step); prog.endStep(Phase.SAVING_CHECKPOINT, step);
step = new Step(StepType.DELEGATION_TOKENS, filePath); step = new Step(StepType.DELEGATION_TOKENS, filePath);
@ -519,6 +535,7 @@ public final class FSImageFormatProtobuf {
saveFileSummary(underlyingOutputStream, summary); saveFileSummary(underlyingOutputStream, summary);
underlyingOutputStream.close(); underlyingOutputStream.close();
savedDigest = new MD5Hash(digester.digest()); savedDigest = new MD5Hash(digester.digest());
return numErrors;
} }
private void saveSecretManagerSection(FileSummary.Builder summary) private void saveSecretManagerSection(FileSummary.Builder summary)

View File

@ -1,4 +1,4 @@
/** /**
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -27,6 +27,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
@ -48,6 +49,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.namenode.AclEntryStatusFormat; import org.apache.hadoop.hdfs.server.namenode.AclEntryStatusFormat;
import org.apache.hadoop.hdfs.server.namenode.AclFeature; import org.apache.hadoop.hdfs.server.namenode.AclFeature;
import org.apache.hadoop.hdfs.server.namenode.FSDirectory; import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode; import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf; import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.LoaderContext; import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.LoaderContext;
@ -406,6 +408,7 @@ public class FSImageFormatPBSnapshot {
private final FileSummary.Builder headers; private final FileSummary.Builder headers;
private final FSImageFormatProtobuf.Saver parent; private final FSImageFormatProtobuf.Saver parent;
private final SaveNamespaceContext context; private final SaveNamespaceContext context;
private long numImageErrors;
public Saver(FSImageFormatProtobuf.Saver parent, public Saver(FSImageFormatProtobuf.Saver parent,
FileSummary.Builder headers, SaveNamespaceContext context, FileSummary.Builder headers, SaveNamespaceContext context,
@ -414,6 +417,7 @@ public class FSImageFormatPBSnapshot {
this.headers = headers; this.headers = headers;
this.context = context; this.context = context;
this.fsn = fsn; this.fsn = fsn;
this.numImageErrors = 0;
} }
/** /**
@ -462,15 +466,17 @@ public class FSImageFormatPBSnapshot {
throws IOException { throws IOException {
final List<INodeReference> refList = parent.getSaverContext() final List<INodeReference> refList = parent.getSaverContext()
.getRefList(); .getRefList();
long i = 0;
for (INodeReference ref : refList) { for (INodeReference ref : refList) {
INodeReferenceSection.INodeReference.Builder rb = buildINodeReference(ref); INodeReferenceSection.INodeReference.Builder rb =
buildINodeReference(ref, i++);
rb.build().writeDelimitedTo(out); rb.build().writeDelimitedTo(out);
} }
parent.commitSection(headers, SectionName.INODE_REFERENCE); parent.commitSection(headers, SectionName.INODE_REFERENCE);
} }
private INodeReferenceSection.INodeReference.Builder buildINodeReference( private INodeReferenceSection.INodeReference.Builder buildINodeReference(
INodeReference ref) throws IOException { final INodeReference ref, final long refIndex) throws IOException {
INodeReferenceSection.INodeReference.Builder rb = INodeReferenceSection.INodeReference.Builder rb =
INodeReferenceSection.INodeReference.newBuilder(). INodeReferenceSection.INodeReference.newBuilder().
setReferredId(ref.getId()); setReferredId(ref.getId());
@ -480,6 +486,16 @@ public class FSImageFormatPBSnapshot {
} else if (ref instanceof DstReference) { } else if (ref instanceof DstReference) {
rb.setDstSnapshotId(ref.getDstSnapshotId()); rb.setDstSnapshotId(ref.getDstSnapshotId());
} }
if (fsn.getFSDirectory().getInode(ref.getId()) == null) {
FSImage.LOG.error(
"FSImageFormatPBSnapshot: Missing referred INodeId " +
ref.getId() + " for INodeReference index " + refIndex +
"; path=" + ref.getFullPathName() +
"; parent=" + (ref.getParent() == null ? "null" :
ref.getParent().getFullPathName()));
++numImageErrors;
}
return rb; return rb;
} }
@ -575,7 +591,23 @@ public class FSImageFormatPBSnapshot {
.getList(ListType.CREATED); .getList(ListType.CREATED);
db.setCreatedListSize(created.size()); db.setCreatedListSize(created.size());
List<INode> deleted = diff.getChildrenDiff().getList(ListType.DELETED); List<INode> deleted = diff.getChildrenDiff().getList(ListType.DELETED);
INode previousNode = null;
boolean misordered = false;
for (INode d : deleted) { for (INode d : deleted) {
// getBytes() may return null below, and that is okay.
final int result = previousNode == null ? -1 :
previousNode.compareTo(d.getLocalNameBytes());
if (result == 0) {
FSImage.LOG.error(
"Name '" + d.getLocalName() + "' is repeated in the " +
"'deleted' difflist of directory " +
dir.getFullPathName() + ", INodeId=" + dir.getId());
++numImageErrors;
} else if (result > 0 && !misordered) {
misordered = true;
++numImageErrors;
}
previousNode = d;
if (d.isReference()) { if (d.isReference()) {
refList.add(d.asReference()); refList.add(d.asReference());
db.addDeletedINodeRef(refList.size() - 1); db.addDeletedINodeRef(refList.size() - 1);
@ -583,11 +615,28 @@ public class FSImageFormatPBSnapshot {
db.addDeletedINode(d.getId()); db.addDeletedINode(d.getId());
} }
} }
if (misordered) {
FSImage.LOG.error(
"Misordered entries in the 'deleted' difflist of directory " +
dir.getFullPathName() + ", INodeId=" + dir.getId() +
". The full list is " +
Arrays.toString(deleted.toArray()));
}
db.build().writeDelimitedTo(out); db.build().writeDelimitedTo(out);
saveCreatedList(created, out); saveCreatedList(created, out);
} }
} }
} }
/**
* Number of non-fatal errors detected while writing the
* SnapshotDiff and INodeReference sections.
* @return the number of non-fatal errors detected.
*/
public long getNumImageErrors() {
return numImageErrors;
}
} }
private FSImageFormatPBSnapshot(){} private FSImageFormatPBSnapshot(){}