From b38649c59a70b3112811443464b3b3180f4b9873 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Mon, 13 Aug 2018 16:12:37 -0700 Subject: [PATCH] HDFS-13813. Exit NameNode if dangling child inode is detected when saving FsImage. Contributed by Siyao Meng. (cherry picked from commit 23854443efa62aa70a1c30c32c3816750e5d7a5b) --- .../server/namenode/FSImageFormatPBINode.java | 28 +++++++++++++++++-- .../namenode/FSImageFormatProtobuf.java | 9 ++++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index 5e60038ce7a..a233d2639be 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -439,6 +439,8 @@ public final class FSImageFormatPBINode { } public final static class Saver { + private long numImageErrors; + private static long buildPermissionStatus(INodeAttributes n, final SaverContext.DeduplicationMap stringMap) { long userId = stringMap.getId(n.getUserName()); @@ -563,11 +565,13 @@ public final class FSImageFormatPBINode { this.summary = summary; this.context = parent.getContext(); this.fsn = context.getSourceNamesystem(); + this.numImageErrors = 0; } void serializeINodeDirectorySection(OutputStream out) throws IOException { - Iterator iter = fsn.getFSDirectory() - .getINodeMap().getMapIterator(); + FSDirectory dir = fsn.getFSDirectory(); + Iterator iter = dir.getINodeMap() + .getMapIterator(); final ArrayList refList = parent.getSaverContext() .getRefList(); int i = 0; @@ -583,6 +587,17 @@ public final class FSImageFormatPBINode { INodeDirectorySection.DirEntry.Builder b = INodeDirectorySection. DirEntry.newBuilder().setParent(n.getId()); for (INode inode : children) { + // Error if the child inode doesn't exist in inodeMap + if (dir.getInode(inode.getId()) == null) { + FSImage.LOG.error( + "FSImageFormatPBINode#serializeINodeDirectorySection: " + + "Dangling child pointer found. Missing INode in " + + "inodeMap: id=" + inode.getId() + + "; path=" + inode.getFullPathName() + + "; parent=" + (inode.getParent() == null ? "null" : + inode.getParent().getFullPathName())); + ++numImageErrors; + } if (!inode.isReference()) { b.addChildren(inode.getId()); } else { @@ -711,6 +726,15 @@ public final class FSImageFormatPBINode { .setId(n.getId()) .setName(ByteString.copyFrom(n.getLocalNameBytes())); } + + /** + * Number of non-fatal errors detected while writing the + * INodeSection and INodeDirectorySection sections. + * @return the number of non-fatal errors detected. + */ + public long getNumImageErrors() { + return numImageErrors; + } } private FSImageFormatPBINode() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java index 4ac20adc426..9752733c1c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java @@ -474,13 +474,15 @@ public final class FSImageFormatProtobuf { out.write(lengthBytes); } - private void saveInodes(FileSummary.Builder summary) throws IOException { + private long saveInodes(FileSummary.Builder summary) throws IOException { FSImageFormatPBINode.Saver saver = new FSImageFormatPBINode.Saver(this, summary); saver.serializeINodeSection(sectionOutputStream); saver.serializeINodeDirectorySection(sectionOutputStream); saver.serializeFilesUCSection(sectionOutputStream); + + return saver.getNumImageErrors(); } /** @@ -543,8 +545,9 @@ public final class FSImageFormatProtobuf { step = new Step(StepType.INODES, filePath); prog.beginStep(Phase.SAVING_CHECKPOINT, step); - saveInodes(b); - long numErrors = saveSnapshots(b); + // Count number of non-fatal errors when saving inodes and snapshots. + long numErrors = saveInodes(b); + numErrors += saveSnapshots(b); prog.endStep(Phase.SAVING_CHECKPOINT, step); step = new Step(StepType.DELEGATION_TOKENS, filePath);