HDFS-13314. NameNode should optionally exit if it detects FsImage corruption. Contributed by Arpit Agarwal.

This commit is contained in:
Arpit Agarwal 2018-03-28 11:37:34 -07:00
parent ac16e8f4d3
commit d54e19a913
3 changed files with 101 additions and 14 deletions

View File

@ -35,6 +35,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -68,6 +69,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.util.Canceler;
import org.apache.hadoop.hdfs.util.MD5FileUtils;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.Time;
import com.google.common.base.Joiner;
@ -86,6 +88,10 @@ public class FSImage implements Closeable {
protected FSEditLog editLog = null;
private boolean isUpgradeFinalized = false;
// If true, then image corruption was detected. The NameNode process will
// exit immediately after saving the image.
private AtomicBoolean exitAfterSave = new AtomicBoolean(false);
protected NNStorage storage;
/**
@ -954,8 +960,14 @@ public class FSImage implements Closeable {
FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context);
FSImageCompression compression = FSImageCompression.createCompression(conf);
saver.save(newFile, compression);
long numErrors = saver.save(newFile, compression);
if (numErrors > 0) {
// The image is likely corrupted.
LOG.error("Detected " + numErrors + " errors while saving FsImage " +
dstFile);
exitAfterSave.set(true);
}
MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest());
storage.setMostRecentCheckpointInfo(txid, Time.now());
}
@ -1117,6 +1129,12 @@ public class FSImage implements Closeable {
}
//Update NameDirSize Metric
getStorage().updateNameDirSize();
if (exitAfterSave.get()) {
LOG.fatal("NameNode process will exit now... The saved FsImage " +
nnf + " is potentially corrupted.");
ExitUtil.terminate(-1);
}
}
/**
@ -1184,8 +1202,11 @@ public class FSImage implements Closeable {
// Since we now have a new checkpoint, we can clean up some
// old edit logs and checkpoints.
purgeOldStorage(nnf);
archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
// Do not purge anything if we just wrote a corrupted FsImage.
if (!exitAfterSave.get()) {
purgeOldStorage(nnf);
archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
}
} finally {
// Notify any threads waiting on the checkpoint to be canceled
// that it is complete.

View File

@ -444,15 +444,22 @@ public final class FSImageFormatProtobuf {
sectionOutputStream.flush();
}
void save(File file, FSImageCompression compression) throws IOException {
/**
* @return number of non-fatal errors detected while writing the image.
* @throws IOException on fatal error.
*/
long save(File file, FSImageCompression compression) throws IOException {
FileOutputStream fout = new FileOutputStream(file);
fileChannel = fout.getChannel();
try {
LOG.info("Saving image file {} using {}", file, compression);
long startTime = monotonicNow();
saveInternal(fout, compression, file.getAbsolutePath());
LOG.info("Image file {} of size {} bytes saved in {} seconds.", file,
file.length(), (monotonicNow() - startTime) / 1000);
long numErrors = saveInternal(
fout, compression, file.getAbsolutePath());
LOG.info("Image file {} of size {} bytes saved in {} seconds {}.", file,
file.length(), (monotonicNow() - startTime) / 1000,
(numErrors > 0 ? (" with" + numErrors + " errors") : ""));
return numErrors;
} finally {
fout.close();
}
@ -476,7 +483,11 @@ public final class FSImageFormatProtobuf {
saver.serializeFilesUCSection(sectionOutputStream);
}
private void saveSnapshots(FileSummary.Builder summary) throws IOException {
/**
* @return number of non-fatal errors detected while saving the image.
* @throws IOException on fatal error.
*/
private long saveSnapshots(FileSummary.Builder summary) throws IOException {
FSImageFormatPBSnapshot.Saver snapshotSaver = new FSImageFormatPBSnapshot.Saver(
this, summary, context, context.getSourceNamesystem());
@ -487,9 +498,14 @@ public final class FSImageFormatProtobuf {
snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream);
}
snapshotSaver.serializeINodeReferenceSection(sectionOutputStream);
return snapshotSaver.getNumImageErrors();
}
private void saveInternal(FileOutputStream fout,
/**
* @return number of non-fatal errors detected while writing the FsImage.
* @throws IOException on fatal error.
*/
private long saveInternal(FileOutputStream fout,
FSImageCompression compression, String filePath) throws IOException {
StartupProgress prog = NameNode.getStartupProgress();
MessageDigest digester = MD5Hash.getDigester();
@ -528,7 +544,7 @@ public final class FSImageFormatProtobuf {
step = new Step(StepType.INODES, filePath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
saveInodes(b);
saveSnapshots(b);
long numErrors = saveSnapshots(b);
prog.endStep(Phase.SAVING_CHECKPOINT, step);
step = new Step(StepType.DELEGATION_TOKENS, filePath);
@ -551,6 +567,7 @@ public final class FSImageFormatProtobuf {
saveFileSummary(underlyingOutputStream, summary);
underlyingOutputStream.close();
savedDigest = new MD5Hash(digester.digest());
return numErrors;
}
private void saveSecretManagerSection(FileSummary.Builder summary)

View File

@ -1,4 +1,4 @@
/**
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -27,6 +27,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@ -49,6 +50,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.namenode.AclEntryStatusFormat;
import org.apache.hadoop.hdfs.server.namenode.AclFeature;
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.LoaderContext;
@ -415,6 +417,7 @@ public class FSImageFormatPBSnapshot {
private final FileSummary.Builder headers;
private final FSImageFormatProtobuf.Saver parent;
private final SaveNamespaceContext context;
private long numImageErrors;
public Saver(FSImageFormatProtobuf.Saver parent,
FileSummary.Builder headers, SaveNamespaceContext context,
@ -423,6 +426,7 @@ public class FSImageFormatPBSnapshot {
this.headers = headers;
this.context = context;
this.fsn = fsn;
this.numImageErrors = 0;
}
/**
@ -471,15 +475,17 @@ public class FSImageFormatPBSnapshot {
throws IOException {
final List<INodeReference> refList = parent.getSaverContext()
.getRefList();
long i = 0;
for (INodeReference ref : refList) {
INodeReferenceSection.INodeReference.Builder rb = buildINodeReference(ref);
INodeReferenceSection.INodeReference.Builder rb =
buildINodeReference(ref, i++);
rb.build().writeDelimitedTo(out);
}
parent.commitSection(headers, SectionName.INODE_REFERENCE);
}
private INodeReferenceSection.INodeReference.Builder buildINodeReference(
INodeReference ref) throws IOException {
final INodeReference ref, final long refIndex) throws IOException {
INodeReferenceSection.INodeReference.Builder rb =
INodeReferenceSection.INodeReference.newBuilder().
setReferredId(ref.getId());
@ -489,6 +495,16 @@ public class FSImageFormatPBSnapshot {
} else if (ref instanceof DstReference) {
rb.setDstSnapshotId(ref.getDstSnapshotId());
}
if (fsn.getFSDirectory().getInode(ref.getId()) == null) {
FSImage.LOG.error(
"FSImageFormatPBSnapshot: Missing referred INodeId " +
ref.getId() + " for INodeReference index " + refIndex +
"; path=" + ref.getFullPathName() +
"; parent=" + (ref.getParent() == null ? "null" :
ref.getParent().getFullPathName()));
++numImageErrors;
}
return rb;
}
@ -583,7 +599,23 @@ public class FSImageFormatPBSnapshot {
List<INode> created = diff.getChildrenDiff().getCreatedUnmodifiable();
db.setCreatedListSize(created.size());
List<INode> deleted = diff.getChildrenDiff().getDeletedUnmodifiable();
INode previousNode = null;
boolean misordered = false;
for (INode d : deleted) {
// getBytes() may return null below, and that is okay.
final int result = previousNode == null ? -1 :
previousNode.compareTo(d.getLocalNameBytes());
if (result == 0) {
FSImage.LOG.error(
"Name '" + d.getLocalName() + "' is repeated in the " +
"'deleted' difflist of directory " +
dir.getFullPathName() + ", INodeId=" + dir.getId());
++numImageErrors;
} else if (result > 0 && !misordered) {
misordered = true;
++numImageErrors;
}
previousNode = d;
if (d.isReference()) {
refList.add(d.asReference());
db.addDeletedINodeRef(refList.size() - 1);
@ -591,11 +623,28 @@ public class FSImageFormatPBSnapshot {
db.addDeletedINode(d.getId());
}
}
if (misordered) {
FSImage.LOG.error(
"Misordered entries in the 'deleted' difflist of directory " +
dir.getFullPathName() + ", INodeId=" + dir.getId() +
". The full list is " +
Arrays.toString(deleted.toArray()));
}
db.build().writeDelimitedTo(out);
saveCreatedList(created, out);
}
}
}
/**
* Number of non-fatal errors detected while writing the
* SnapshotDiff and INodeReference sections.
* @return the number of non-fatal errors detected.
*/
public long getNumImageErrors() {
return numImageErrors;
}
}
private FSImageFormatPBSnapshot(){}