HDFS-13818. Extend OIV to detect FSImage corruption. Contributed by Adam Antal.

Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
2018-12-03 10:33:51 -08:00 · 2018-12-03 10:33:51 -08:00 · fb10803dfa
commit fb10803dfa
parent dd5e7c6b72
12 changed files with 1052 additions and 73 deletions
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
@ -79,6 +79,12 @@ public class OfflineImageViewerPB {
      + "    to both inodes and inodes-under-construction, separated by a\n"
      + "    delimiter. The default delimiter is \\t, though this may be\n"
      + "    changed via the -delimiter argument.\n"
+      + "  * DetectCorruption: Detect potential corruption of the image by\n"
+      + "    selectively loading parts of it and actively searching for\n"
+      + "    inconsistencies. Outputs a summary of the found corruptions\n"
+      + "    in a delimited format.\n"
+      + "    Note that the check is not exhaustive, and only catches\n"
+      + "    missing nodes during the namespace reconstruction.\n"
      + "\n"
      + "Required command line arguments:\n"
      + "-i,--inputFile <arg>   FSImage or XML file to process.\n"
@ -91,12 +97,15 @@ public class OfflineImageViewerPB {
      + "                       will also create an <outputFile>.md5 file.\n"
      + "-p,--processor <arg>   Select which type of processor to apply\n"
      + "                       against image file. (XML|FileDistribution|\n"
-      + "                       ReverseXML|Web|Delimited)\n"
+      + "                       ReverseXML|Web|Delimited|DetectCorruption)\n"
      + "                       The default is Web.\n"
-      + "-delimiter <arg>       Delimiting string to use with Delimited processor.  \n"
-      + "-t,--temp <arg>        Use temporary dir to cache intermediate result to generate\n"
-      + "                       Delimited outputs. If not set, Delimited processor constructs\n"
-      + "                       the namespace in memory before outputting text.\n"
+      + "-delimiter <arg>       Delimiting string to use with Delimited or \n"
+      + "                       DetectCorruption processor. \n"
+      + "-t,--temp <arg>        Use temporary dir to cache intermediate\n"
+      + "                       result to generate DetectCorruption or\n"
+      + "                       Delimited outputs. If not set, the processor\n"
+      + "                       constructs the namespace in memory \n"
+      + "                       before outputting text.\n"
      + "-h,--help              Display usage information and exit\n";

  /**
@ -172,7 +181,7 @@ public static int run(String[] args) throws Exception {
    String processor = cmd.getOptionValue("p", "Web");
    String outputFile = cmd.getOptionValue("o", "-");
    String delimiter = cmd.getOptionValue("delimiter",
-        PBImageDelimitedTextWriter.DEFAULT_DELIMITER);
+        PBImageTextWriter.DEFAULT_DELIMITER);
    String tempPath = cmd.getOptionValue("t", "");

    Configuration conf = new Configuration();
@ -219,6 +228,12 @@ public static int run(String[] args) throws Exception {
          writer.visit(r);
        }
        break;
+      case "DETECTCORRUPTION":
+        try (PBImageCorruptionDetector detector =
+            new PBImageCorruptionDetector(out, delimiter, tempPath)) {
+          detector.visit(new RandomAccessFile(inputFile, "r"));
+        }
+        break;
      default:
        System.err.println("Invalid processor specified : " + processor);
        printUsage();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruption.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruption.java
@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.util.EnumSet;
+
+/**
+ * Class representing a corruption in the PBImageCorruptionDetector processor.
+ */
+public class PBImageCorruption {
+  private static final String WITH = "With";
+
+  /**
+   * PBImageCorruptionType is a wrapper for getting a string output for
+   * different types of corruption. Could be added more cases if
+   * other types are revealed. Currently hasMissingChild and
+   * isCorruptNode are the relevant cases.
+   */
+  private enum PBImageCorruptionType {
+    CORRUPT_NODE("CorruptNode"),
+    MISSING_CHILD("MissingChild");
+
+    private final String name;
+
+    PBImageCorruptionType(String s) {
+      name = s;
+    }
+
+    @Override
+    public String toString() {
+      return this.name;
+    }
+  }
+
+  private long id;
+  private EnumSet<PBImageCorruptionType> type;
+  private int numOfCorruptChildren;
+
+  PBImageCorruption(long id, boolean missingChild, boolean corruptNode,
+                    int numOfCorruptChildren) {
+    if (!missingChild && !corruptNode) {
+      throw new IllegalArgumentException(
+          "Corruption must have at least one aspect!");
+    }
+    this.id = id;
+    this.type = EnumSet.noneOf(PBImageCorruptionType.class);
+    if (missingChild) {
+      type.add(PBImageCorruptionType.MISSING_CHILD);
+    }
+    if (corruptNode) {
+      type.add(PBImageCorruptionType.CORRUPT_NODE);
+    }
+    this.numOfCorruptChildren = numOfCorruptChildren;
+  }
+
+  void addMissingChildCorruption() {
+    type.add(PBImageCorruptionType.MISSING_CHILD);
+  }
+
+  void addCorruptNodeCorruption() {
+    type.add(PBImageCorruptionType.CORRUPT_NODE);
+  }
+
+  void setNumberOfCorruption(int numOfCorruption) {
+    this.numOfCorruptChildren = numOfCorruption;
+  }
+
+  long getId() {
+    return id;
+  }
+
+  String getType() {
+    StringBuffer s = new StringBuffer();
+    if (type.contains(PBImageCorruptionType.CORRUPT_NODE)) {
+      s.append(PBImageCorruptionType.CORRUPT_NODE);
+    }
+    if (type.contains(PBImageCorruptionType.CORRUPT_NODE) &&
+        type.contains(PBImageCorruptionType.MISSING_CHILD)) {
+      s.append(WITH);
+    }
+
+    if (type.contains(PBImageCorruptionType.MISSING_CHILD)) {
+      s.append(PBImageCorruptionType.MISSING_CHILD);
+    }
+    return s.toString();
+  }
+
+  int getNumOfCorruptChildren() {
+    return numOfCorruptChildren;
+  }
+
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruptionDetector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruptionDetector.java
@ -0,0 +1,344 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+/**
+ * The PBImageCorruptionDetector detects corruptions in the image.
+ * It produces a file with the found issues similar to the Delimited
+ * processor. The default delimiter is tab, as this is an unlikely value
+ * to be included in an inode path. The delimiter value can be changed
+ * via the constructor.
+ *
+ * It looks for the following kinds of corruptions:
+ *  - an INode id is mentioned in the INodeDirectorySection, but not present
+ *    in the INodeSection (corrupt INode case)
+ *  - an INode has children, but at least one of them is corrupted
+ *    (missing children case)
+ * If multiple layers of directory structure are damaged then it is possible
+ * that an INode is corrupted and also having corrupted children.
+ *
+ * Note that the OIV DetectCorruption processor check is not exhaustive,
+ * and only catches the corruptions like above. This processor may be up to
+ * extension in the future when new aspects of corruption are found.
+ */
+public class PBImageCorruptionDetector extends PBImageTextWriter {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(PBImageCorruptionDetector.class);
+
+  /**
+   * Builder object for producing entries (lines) for
+   * PBImageCorruptionDetector. The isSnapshot field is mandatory.
+   */
+  static class OutputEntryBuilder {
+    private static final String MISSING = "Missing";
+
+    private PBImageCorruptionDetector corrDetector;
+    private PBImageCorruption corruption;
+    private boolean isSnapshot;
+    private String parentPath;
+    private long parentId;
+    private String name;
+    private String nodeType;
+
+    OutputEntryBuilder(PBImageCorruptionDetector corrDetector,
+        boolean isSnapshot) {
+      this.corrDetector = corrDetector;
+      this.isSnapshot = isSnapshot;
+      this.parentId = -1;
+      this.parentPath = "";
+      this.name = "";
+      this.nodeType = "";
+    }
+
+    OutputEntryBuilder setCorruption(PBImageCorruption corr) {
+      this.corruption = corr;
+      return this;
+    }
+
+    OutputEntryBuilder setParentPath(String path) {
+      this.parentPath = path;
+      return this;
+    }
+
+    OutputEntryBuilder setParentId(long id) {
+      this.parentId = id;
+      return this;
+    }
+
+    OutputEntryBuilder setName(String n) {
+      this.name = n;
+      return this;
+    }
+
+    OutputEntryBuilder setNodeType(String nType) {
+      this.nodeType = nType;
+      return this;
+    }
+
+    public String build() {
+      StringBuffer buffer = new StringBuffer();
+      buffer.append(corruption.getType());
+      corrDetector.append(buffer, corruption.getId());
+      corrDetector.append(buffer, String.valueOf(isSnapshot));
+      corrDetector.append(buffer, parentPath);
+      if (parentId == -1) {
+        corrDetector.append(buffer, MISSING);
+      } else {
+        corrDetector.append(buffer, parentId);
+      }
+      corrDetector.append(buffer, name);
+      corrDetector.append(buffer, nodeType);
+      corrDetector.append(buffer, corruption.getNumOfCorruptChildren());
+      return buffer.toString();
+    }
+  }
+
+  private static class CorruptionChecker {
+    private static final String NODE_TYPE = "Node";
+    private static final String REF_TYPE = "Ref";
+    private static final String UNKNOWN_TYPE = "Unknown";
+
+    /** Contains all existing INode IDs. */
+    private Set<Long> nodeIds;
+    /** Contains all existing INodeReference IDs. */
+    private Set<Long> nodeRefIds;
+
+    CorruptionChecker() {
+      nodeIds = new HashSet<>();
+    }
+
+    /**
+     * Collect a INode Id.
+     */
+    void saveNodeId(long id) {
+      Preconditions.checkState(nodeIds != null && !nodeIds.contains(id));
+      nodeIds.add(id);
+    }
+
+    /**
+     * Returns whether the given INode id was saved previously.
+     */
+    boolean isNodeIdExist(long id) {
+      return nodeIds.contains(id);
+    }
+
+    /**
+     * Returns whether the given INodeReference id was saved previously.
+     */
+    boolean isNodeRefIdExist(long id) {
+      return nodeRefIds.contains(id);
+    }
+
+    /**
+     * Saves the INodeReference ids.
+     */
+    void saveNodeRefIds(List<Long> nodeRefIdList) {
+      nodeRefIds = new HashSet<>(nodeRefIdList);
+    }
+
+    String getTypeOfId(long id) {
+      if (isNodeIdExist(id)) {
+        return NODE_TYPE;
+      } else if (isNodeRefIdExist(id)) {
+        return REF_TYPE;
+      } else {
+        return UNKNOWN_TYPE;
+      }
+    }
+  }
+
+  /** Delimiter string used while producing output. */
+  private final CorruptionChecker corrChecker;
+  /** Id to corruption mapping. */
+  private final Map<Long, PBImageCorruption> corruptionsMap;
+
+  PBImageCorruptionDetector(PrintStream out, String delimiter,
+        String tempPath) throws IOException {
+    super(out, delimiter, tempPath);
+    corrChecker = new CorruptionChecker();
+    corruptionsMap = new TreeMap<Long, PBImageCorruption>();
+  }
+
+  @Override
+  public String getHeader() {
+    StringBuffer buffer = new StringBuffer();
+    buffer.append("CorruptionType");
+    append(buffer, "Id");
+    append(buffer, "IsSnapshot");
+    append(buffer, "ParentPath");
+    append(buffer, "ParentId");
+    append(buffer, "Name");
+    append(buffer, "NodeType");
+    append(buffer, "CorruptChildren");
+    return buffer.toString();
+  }
+
+  @Override
+  public String getEntry(String parentPath,
+      FsImageProto.INodeSection.INode inode) {
+    long id = inode.getId();
+    if (corruptionsMap.containsKey(id)) {
+      OutputEntryBuilder entryBuilder =
+          new OutputEntryBuilder(this, false);
+      long parentId = -1;
+      try {
+        parentId = getParentId(id);
+      } catch (IOException ignore) {
+      }
+      entryBuilder.setCorruption(corruptionsMap.get(id))
+          .setParentPath(parentPath)
+          .setName(inode.getName().toStringUtf8())
+          .setNodeType(corrChecker.getTypeOfId(id));
+      if (parentId != -1) {
+        entryBuilder.setParentId(parentId);
+      }
+      corruptionsMap.remove(id);
+      return entryBuilder.build();
+    } else {
+      return "";
+    }
+  }
+
+  @Override
+  protected void checkNode(FsImageProto.INodeSection.INode p,
+        AtomicInteger numDirs) throws IOException {
+    super.checkNode(p, numDirs);
+    corrChecker.saveNodeId(p.getId());
+  }
+
+  private void addCorruptedNode(long childId) {
+    if (!corruptionsMap.containsKey(childId)) {
+      PBImageCorruption c = new PBImageCorruption(childId, false, true, 0);
+      corruptionsMap.put(childId, c);
+    } else {
+      PBImageCorruption c = corruptionsMap.get(childId);
+      c.addCorruptNodeCorruption();
+      corruptionsMap.put(childId, c);
+    }
+  }
+
+  private void addCorruptedParent(long id, int numOfCorruption) {
+    if (!corruptionsMap.containsKey(id)) {
+      PBImageCorruption c = new PBImageCorruption(id, true, false,
+          numOfCorruption);
+      corruptionsMap.put(id, c);
+    } else {
+      PBImageCorruption c = corruptionsMap.get(id);
+      c.addMissingChildCorruption();
+      c.setNumberOfCorruption(numOfCorruption);
+      corruptionsMap.put(id, c);
+    }
+  }
+
+  /**
+   * Scan the INodeDirectory section to construct the namespace.
+   */
+  @Override
+  protected void buildNamespace(InputStream in, List<Long> refIdList)
+      throws IOException {
+    corrChecker.saveNodeRefIds(refIdList);
+    LOG.debug("Saved INodeReference ids of size {}.", refIdList.size());
+    int count = 0;
+    while (true) {
+      FsImageProto.INodeDirectorySection.DirEntry e =
+          FsImageProto.INodeDirectorySection.DirEntry.parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      count++;
+      if (LOG.isDebugEnabled() && count % 10000 == 0) {
+        LOG.debug("Scanned {} directories.", count);
+      }
+      long parentId = e.getParent();
+      if (!corrChecker.isNodeIdExist(parentId)) {
+        LOG.debug("Corruption detected! Parent node is not contained " +
+            "in the list of known ids!");
+        addCorruptedNode(parentId);
+      }
+      int numOfCorruption = 0;
+      for (int i = 0; i < e.getChildrenCount(); i++) {
+        long childId = e.getChildren(i);
+        putDirChildToMetadataMap(parentId, childId);
+        if (!corrChecker.isNodeIdExist(childId)) {
+          addCorruptedNode(childId);
+          numOfCorruption++;
+        }
+      }
+      if (numOfCorruption > 0) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("{} corruption detected! Child nodes are missing.",
+              numOfCorruption);
+        }
+        addCorruptedParent(parentId, numOfCorruption);
+      }
+      for (int i = e.getChildrenCount();
+           i < e.getChildrenCount() + e.getRefChildrenCount(); i++) {
+        int refId = e.getRefChildren(i - e.getChildrenCount());
+        // In this case the refNode is referred directly (by its position),
+        // so we couldn't make sure of the correctness
+        putDirChildToMetadataMap(parentId, refIdList.get(refId));
+      }
+    }
+    LOG.info("Scanned {} INode directories to build namespace.", count);
+  }
+
+  @Override
+  public void afterOutput() throws IOException {
+    if (!corruptionsMap.isEmpty()) {
+      // Also write out corruptions when the path could be not be decided
+      LOG.info("Outputting {} more corrupted nodes.", corruptionsMap.size());
+      for (PBImageCorruption c : corruptionsMap.values()) {
+        long id = c.getId();
+        String name = "";
+        long parentId = -1;
+        try {
+          name = getNodeName(id);
+        } catch (IgnoreSnapshotException ignored) {
+        }
+        try {
+          parentId = getParentId(id);
+        } catch (IgnoreSnapshotException ignored) {
+        }
+        OutputEntryBuilder entryBuilder =
+            new OutputEntryBuilder(this, true);
+        entryBuilder.setCorruption(corruptionsMap.get(id))
+            .setName(name)
+            .setNodeType(corrChecker.getTypeOfId(id));
+        if (parentId != -1) {
+          entryBuilder.setParentId(parentId);
+        }
+        printIfNotEmpty(entryBuilder.build());
+      }
+    }
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
@ -17,8 +17,6 @@
 */
 package org.apache.hadoop.hdfs.tools.offlineImageViewer;

-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.text.StringEscapeUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
@ -46,48 +44,19 @@
 * constructor.
 */
 public class PBImageDelimitedTextWriter extends PBImageTextWriter {
-  static final String DEFAULT_DELIMITER = "\t";
  private static final String DATE_FORMAT="yyyy-MM-dd HH:mm";
  private final SimpleDateFormat dateFormatter =
      new SimpleDateFormat(DATE_FORMAT);

-  private final String delimiter;
-
  PBImageDelimitedTextWriter(PrintStream out, String delimiter, String tempPath)
      throws IOException {
-    super(out, tempPath);
-    this.delimiter = delimiter;
+    super(out, delimiter, tempPath);
  }

  private String formatDate(long date) {
    return dateFormatter.format(new Date(date));
  }

-  private void append(StringBuffer buffer, int field) {
-    buffer.append(delimiter);
-    buffer.append(field);
-  }
-
-  private void append(StringBuffer buffer, long field) {
-    buffer.append(delimiter);
-    buffer.append(field);
-  }
-
-  static final String CRLF = StringUtils.CR + StringUtils.LF;
-
-  private void append(StringBuffer buffer, String field) {
-    buffer.append(delimiter);
-
-    String escapedField = StringEscapeUtils.escapeCsv(field);
-    if (escapedField.contains(CRLF)) {
-      escapedField = escapedField.replace(CRLF, "%x0D%x0A");
-    } else if (escapedField.contains(StringUtils.LF)) {
-      escapedField = escapedField.replace(StringUtils.LF, "%x0A");
-    }
-
-    buffer.append(escapedField);
-  }
-
  @Override
  public String getEntry(String parent, INode inode) {
    StringBuffer buffer = new StringBuffer();
@ -168,4 +137,9 @@ public String getHeader() {
    append(buffer, "GroupName");
    return buffer.toString();
  }
+
+  @Override
+  public void afterOutput() {
+    // do nothing
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
@ -20,6 +20,8 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.StringEscapeUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.PermissionStatus;
@ -56,6 +58,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
@ -88,6 +91,9 @@ abstract class PBImageTextWriter implements Closeable {
  private static final Logger LOG =
      LoggerFactory.getLogger(PBImageTextWriter.class);

+  static final String DEFAULT_DELIMITER = "\t";
+  static final String CRLF = StringUtils.CR + StringUtils.LF;
+
  /**
   * This metadata map is used to construct the namespace before generating
   * text outputs.
@ -114,6 +120,15 @@ private static interface MetadataMap extends Closeable {

    /** Synchronize metadata to persistent storage, if possible */
    public void sync() throws IOException;
+
+    /** Returns the name of inode. */
+    String getName(long id) throws IOException;
+
+    /**
+     * Returns the id of the parent's inode, if mentioned in
+     * INodeDirectorySection, throws IgnoreSnapshotException otherwise.
+     */
+    long getParentId(long id) throws IOException;
  }

  /**
@ -142,18 +157,32 @@ private void setParent(Dir parent) {
      /**
       * Returns the full path of this directory.
       */
-      private String getPath() {
+      String getPath() throws IgnoreSnapshotException {
        if (this.parent == null) {
-          return "/";
+          if (this.inode == INodeId.ROOT_INODE_ID) {
+            return "/";
+          } else {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Not root inode with id {} having no parent.", inode);
+            }
+            throw PBImageTextWriter.createIgnoredSnapshotException(inode);
+          }
        }
        if (this.path == null) {
          this.path = new Path(parent.getPath(), name.isEmpty() ? "/" : name).
              toString();
-          this.name = null;
        }
        return this.path;
      }

+      String getName() throws IgnoreSnapshotException {
+        return name;
+      }
+
+      long getId() {
+        return inode;
+      }
+
      @Override
      public boolean equals(Object o) {
        return o instanceof Dir && inode == ((Dir) o).inode;
@ -165,6 +194,29 @@ public int hashCode() {
      }
    }

+    /**
+     * If the Dir entry does not exist (i.e. the inode was not contained in
+     * INodeSection) we still create a Dir entry which throws exceptions
+     * for calls other than getId().
+     * We can make sure this way, the getId and getParentId calls will
+     * always succeed if we have the information.
+     */
+    private static class CorruptedDir extends Dir {
+      CorruptedDir(long inode) {
+        super(inode, null);
+      }
+
+      @Override
+      String getPath() throws IgnoreSnapshotException {
+        throw PBImageTextWriter.createIgnoredSnapshotException(getId());
+      }
+
+      @Override
+      String getName() throws IgnoreSnapshotException {
+        throw PBImageTextWriter.createIgnoredSnapshotException(getId());
+      }
+    }
+
    /** INode Id to Dir object mapping */
    private Map<Long, Dir> dirMap = new HashMap<>();

@ -178,13 +230,20 @@ public int hashCode() {
    public void close() throws IOException {
    }

+    private Dir getOrCreateCorrupted(long id) {
+      Dir dir = dirMap.get(id);
+      if (dir == null) {
+        dir = new CorruptedDir(id);
+        dirMap.put(id, dir);
+      }
+      return dir;
+    }
+
    @Override
    public void putDirChild(long parentId, long childId) {
-      Dir parent = dirMap.get(parentId);
-      Dir child = dirMap.get(childId);
-      if (child != null) {
-        child.setParent(parent);
-      }
+      Dir parent = getOrCreateCorrupted(parentId);
+      Dir child = getOrCreateCorrupted(childId);
+      child.setParent(parent);
      Preconditions.checkState(!dirChildMap.containsKey(childId));
      dirChildMap.put(childId, parent);
    }
@ -199,13 +258,13 @@ public void putDir(INode p) {
    @Override
    public String getParentPath(long inode) throws IOException {
      if (inode == INodeId.ROOT_INODE_ID) {
-        return "";
+        return "/";
      }
      Dir parent = dirChildMap.get(inode);
      if (parent == null) {
        // The inode is an INodeReference, which is generated from snapshot.
        // For delimited oiv tool, no need to print out metadata in snapshots.
-        PBImageTextWriter.ignoreSnapshotName(inode);
+        throw PBImageTextWriter.createIgnoredSnapshotException(inode);
      }
      return parent.getPath();
    }
@ -213,6 +272,24 @@ public String getParentPath(long inode) throws IOException {
    @Override
    public void sync() {
    }
+
+    @Override
+    public String getName(long id) throws IgnoreSnapshotException {
+      Dir dir = dirMap.get(id);
+      if (dir != null) {
+        return dir.getName();
+      }
+      throw PBImageTextWriter.createIgnoredSnapshotException(id);
+    }
+
+    @Override
+    public long getParentId(long id) throws IgnoreSnapshotException {
+      Dir parentDir = dirChildMap.get(id);
+      if (parentDir != null) {
+        return parentDir.getId();
+      }
+      throw PBImageTextWriter.createIgnoredSnapshotException(id);
+    }
  }

  /**
@ -353,33 +430,37 @@ public void putDir(INode dir) throws IOException {
      dirMap.put(toBytes(dir.getId()), toBytes(dir.getName().toStringUtf8()));
    }

-    @Override
-    public String getParentPath(long inode) throws IOException {
-      if (inode == INodeId.ROOT_INODE_ID) {
-        return "/";
-      }
+    private long getFromDirChildMap(long inode) throws IOException {
      byte[] bytes = dirChildMap.get(toBytes(inode));
      if (bytes == null) {
        // The inode is an INodeReference, which is generated from snapshot.
        // For delimited oiv tool, no need to print out metadata in snapshots.
-        PBImageTextWriter.ignoreSnapshotName(inode);
+        throw PBImageTextWriter.createIgnoredSnapshotException(inode);
      }
      if (bytes.length != 8) {
        throw new IOException(
            "bytes array length error. Actual length is " + bytes.length);
      }
-      long parent = toLong(bytes);
+      return toLong(bytes);
+    }
+
+    @Override
+    public String getParentPath(long inode) throws IOException {
+      if (inode == INodeId.ROOT_INODE_ID) {
+        return "/";
+      }
+      long parent = getFromDirChildMap(inode);
      if (!dirPathCache.containsKey(parent)) {
-        bytes = dirMap.get(toBytes(parent));
+        byte[] bytes = dirMap.get(toBytes(parent));
        if (parent != INodeId.ROOT_INODE_ID && bytes == null) {
          // The parent is an INodeReference, which is generated from snapshot.
          // For delimited oiv tool, no need to print out metadata in snapshots.
-          PBImageTextWriter.ignoreSnapshotName(parent);
+          throw PBImageTextWriter.createIgnoredSnapshotException(inode);
        }
        String parentName = toString(bytes);
        String parentPath =
            new Path(getParentPath(parent),
-                parentName.isEmpty()? "/" : parentName).toString();
+                parentName.isEmpty() ? "/" : parentName).toString();
        dirPathCache.put(parent, parentPath);
      }
      return dirPathCache.get(parent);
@ -390,11 +471,26 @@ public void sync() throws IOException {
      dirChildMap.sync();
      dirMap.sync();
    }
+
+    @Override
+    public String getName(long id) throws IOException {
+      byte[] bytes = dirMap.get(toBytes(id));
+      if (bytes != null) {
+        return toString(bytes);
+      }
+      throw PBImageTextWriter.createIgnoredSnapshotException(id);
+    }
+
+    @Override
+    public long getParentId(long id) throws IOException {
+      return getFromDirChildMap(id);
+    }
  }

  private SerialNumberManager.StringTable stringTable;
  private PrintStream out;
  private MetadataMap metadataMap = null;
+  private String delimiter;

  /**
   * Construct a PB FsImage writer to generate text file.
@ -402,8 +498,10 @@ public void sync() throws IOException {
   * @param tempPath the path to store metadata. If it is empty, store metadata
   *                 in memory instead.
   */
-  PBImageTextWriter(PrintStream out, String tempPath) throws IOException {
+  PBImageTextWriter(PrintStream out, String delimiter, String tempPath)
+      throws IOException {
    this.out = out;
+    this.delimiter = delimiter;
    if (tempPath.isEmpty()) {
      metadataMap = new InMemoryMetadataDB();
    } else {
@ -417,6 +515,29 @@ public void close() throws IOException {
    IOUtils.cleanup(null, metadataMap);
  }

+  void append(StringBuffer buffer, int field) {
+    buffer.append(delimiter);
+    buffer.append(field);
+  }
+
+  void append(StringBuffer buffer, long field) {
+    buffer.append(delimiter);
+    buffer.append(field);
+  }
+
+  void append(StringBuffer buffer, String field) {
+    buffer.append(delimiter);
+
+    String escapedField = StringEscapeUtils.escapeCsv(field);
+    if (escapedField.contains(CRLF)) {
+      escapedField = escapedField.replace(CRLF, "%x0D%x0A");
+    } else if (escapedField.contains(StringUtils.LF)) {
+      escapedField = escapedField.replace(StringUtils.LF, "%x0A");
+    }
+
+    buffer.append(escapedField);
+  }
+
  /**
   * Get text output for the given inode.
   * @param parent the path of parent directory
@ -429,6 +550,13 @@ public void close() throws IOException {
   */
  abstract protected String getHeader();

+  /**
+   * Method called at the end of output() phase after all the inodes
+   * with known parentPath has been printed out. Can be used to print
+   * additional data depending on the written inodes.
+   */
+  abstract protected void afterOutput() throws IOException;
+
  public void visit(RandomAccessFile file) throws IOException {
    Configuration conf = new Configuration();
    if (!FSImageUtil.checkFileFormat(file)) {
@ -489,6 +617,19 @@ public int compare(FsImageProto.FileSummary.Section s1,
    }
  }

+  void putDirChildToMetadataMap(long parentId, long childId)
+      throws IOException {
+    metadataMap.putDirChild(parentId, childId);
+  }
+
+  String getNodeName(long id) throws IOException {
+    return metadataMap.getName(id);
+  }
+
+  long getParentId(long id) throws IOException {
+    return metadataMap.getParentId(id);
+  }
+
  private void output(Configuration conf, FileSummary summary,
      FileInputStream fin, ArrayList<FileSummary.Section> sections)
      throws IOException {
@ -504,6 +645,7 @@ private void output(Configuration conf, FileSummary summary,
        outputINodes(is);
      }
    }
+    afterOutput();
    long timeTaken = Time.monotonicNow() - startTime;
    LOG.debug("Time to output inodes: {}ms", timeTaken);
  }
@ -553,22 +695,31 @@ private void loadINodeDirSection(
    LOG.info("Finished loading INode directory section in {}ms", timeTaken);
  }

+  /**
+   * Checks the inode (saves if directory), and counts them. Can be overridden
+   * if additional steps are taken when iterating through INodeSection.
+   */
+  protected void checkNode(INode p, AtomicInteger numDirs) throws IOException {
+    if (p.hasDirectory()) {
+      metadataMap.putDir(p);
+      numDirs.incrementAndGet();
+    }
+  }
+
  /**
   * Load the filenames of the directories from the INode section.
   */
-  private void loadDirectoriesInINodeSection(InputStream in) throws IOException {
+  private void loadDirectoriesInINodeSection(InputStream in)
+      throws IOException {
    INodeSection s = INodeSection.parseDelimitedFrom(in);
    LOG.info("Loading directories in INode section.");
-    int numDirs = 0;
+    AtomicInteger numDirs = new AtomicInteger(0);
    for (int i = 0; i < s.getNumInodes(); ++i) {
      INode p = INode.parseDelimitedFrom(in);
      if (LOG.isDebugEnabled() && i % 10000 == 0) {
        LOG.debug("Scanned {} inodes.", i);
      }
-      if (p.hasDirectory()) {
-        metadataMap.putDir(p);
-        numDirs++;
-      }
+      checkNode(p, numDirs);
    }
    LOG.info("Found {} directories in INode section.", numDirs);
  }
@ -576,7 +727,7 @@ private void loadDirectoriesInINodeSection(InputStream in) throws IOException {
  /**
   * Scan the INodeDirectory section to construct the namespace.
   */
-  private void buildNamespace(InputStream in, List<Long> refIdList)
+  protected void buildNamespace(InputStream in, List<Long> refIdList)
      throws IOException {
    int count = 0;
    while (true) {
@ -603,6 +754,12 @@ private void buildNamespace(InputStream in, List<Long> refIdList)
    LOG.info("Scanned {} INode directories to build namespace.", count);
  }

+  void printIfNotEmpty(String line) {
+    if (!line.isEmpty()) {
+      out.println(line);
+    }
+  }
+
  private void outputINodes(InputStream in) throws IOException {
    INodeSection s = INodeSection.parseDelimitedFrom(in);
    LOG.info("Found {} INodes in the INode section", s.getNumInodes());
@ -612,7 +769,7 @@ private void outputINodes(InputStream in) throws IOException {
      INode p = INode.parseDelimitedFrom(in);
      try {
        String parentPath = metadataMap.getParentPath(p.getId());
-        out.println(getEntry(parentPath, p));
+        printIfNotEmpty(getEntry(parentPath, p));
      } catch (IOException ioe) {
        ignored++;
        if (!(ioe instanceof IgnoreSnapshotException)) {
@ -636,11 +793,12 @@ private void outputINodes(InputStream in) throws IOException {
    LOG.info("Outputted {} INodes.", s.getNumInodes());
  }

-  static void ignoreSnapshotName(long inode) throws IOException {
+  private static IgnoreSnapshotException createIgnoredSnapshotException(
+      long inode) {
    // Ignore snapshots - we want the output similar to -ls -R.
    if (LOG.isDebugEnabled()) {
      LOG.debug("No snapshot name found for inode {}", inode);
    }
-    throw new IgnoreSnapshotException();
+    return new IgnoreSnapshotException();
  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsImageViewer.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsImageViewer.md
@ -55,7 +55,13 @@ The Offline Image Viewer provides several output processors:
   delimiter. The default delimiter is \t, though this may be changed via
   the -delimiter argument.

-5. ReverseXML (experimental): This is the opposite of the XML processor;
+5. DetectCorruption (experimental): Detect potential corruption of the image
+   by selectively loading parts of it and actively searching for
+   inconsistencies. Outputs a summary of the found corruptions
+   in a delimited format. Note that the check is not exhaustive,
+   and only catches missing nodes during the namespace reconstruction.
+
+6. ReverseXML (experimental): This is the opposite of the XML processor;
   it reconstructs an fsimage from an XML file. This processor makes it easy to
   create fsimages for testing, and manually edit fsimages when there is
   corruption.
@ -197,6 +203,30 @@ If not set, Delimited processor will construct the namespace in memory before ou
       /dir0/file1	1	2017-02-13 10:39	2017-02-13 10:39	134217728	1	1	0	0	-rw-r--r--	root	supergroup
       /dir0/file2	1	2017-02-13 10:39	2017-02-13 10:39	134217728	1	1	0	0	-rw-r--r--	root	supergroup

+### DetectCorruption Processor
+
+DetectCorruption processor generates a text representation of the errors of the fsimage, if there's any. It displays the following cases:
+
+1.  an inode is mentioned in the fsimage but no associated metadata is found (CorruptNode)
+
+2.  an inode has at least one corrupt children (MissingChildren)
+
+The delimiter string can be provided with the -delimiter option, and the processor can cache intermediate result using the -t option.
+
+        bash$ bin/hdfs oiv -p DetectCorruption -delimiter delimiterString -t temporaryDir -i fsimage -o output
+
+The output result of this processor is empty if no corruption is found, otherwise the found entries in the following format:
+
+        CorruptionType	Id	IsSnapshot	ParentPath	ParentId	Name	NodeType	CorruptChildren
+        MissingChild	16385	false	/	Missing		Node	1
+        MissingChild	16386	false	/	16385	dir0	Node	2
+        CorruptNode	16388	true		16386		Unknown	0
+        CorruptNode	16389	true		16386		Unknown	0
+        CorruptNodeWithMissingChild	16391	true		16385		Unknown	1
+        CorruptNode	16394	true		16391		Unknown	0
+
+The column CorruptionType can be MissingChild, CorruptNode or the combination of these two. IsSnapshot shows whether the node is kept in a snapshot or not. To the NodeType column either Node, Ref or Unknown can be written depending whether the node is an inode, a reference, or is corrupted and thus unknown. CorruptChildren contains the number of the corrupt children the inode may have.
+
 Options
 -------

@ -204,12 +234,12 @@ Options
 |:---- |:---- |
 | `-i`\|`--inputFile` *input file* | Specify the input fsimage file (or XML file, if ReverseXML processor is used) to process. Required. |
 | `-o`\|`--outputFile` *output file* | Specify the output filename, if the specified output processor generates one. If the specified file already exists, it is silently overwritten. (output to stdout by default) If the input file is an XML file, it also creates an &lt;outputFile&gt;.md5. |
-| `-p`\|`--processor` *processor* | Specify the image processor to apply against the image file. Currently valid options are `Web` (default), `XML`, `Delimited`, `FileDistribution` and `ReverseXML`. |
+| `-p`\|`--processor` *processor* | Specify the image processor to apply against the image file. Currently valid options are `Web` (default), `XML`, `Delimited`, `DetectCorruption`, `FileDistribution` and `ReverseXML`. |
 | `-addr` *address* | Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. |
 | `-maxSize` *size* | Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. |
 | `-step` *size* | Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. |
 | `-format` | Format the output result in a human-readable fashion rather than a number of bytes. (false by default). This option is used with FileDistribution processor. |
-| `-delimiter` *arg* | Delimiting string to use with Delimited processor. |
+| `-delimiter` *arg* | Delimiting string to use with Delimited or DetectCorruption processor. |
 | `-t`\|`--temp` *temporary dir* | Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. |
 | `-h`\|`--help` | Display the tool usage and help information and exit. |

--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
@ -43,6 +43,7 @@
 import org.apache.hadoop.io.erasurecode.ECSchema;
 import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;

 import java.io.BufferedReader;
@ -51,6 +52,7 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
+import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.PrintStream;
@ -60,11 +62,13 @@
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URL;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
@ -76,6 +80,10 @@
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;

 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.output.NullOutputStream;
@ -134,6 +142,10 @@ public class TestOfflineImageViewer {
  private static File originalFsimage = null;
  private static int filesECCount = 0;
  private static String addedErasureCodingPolicyName = null;
+  private static final long FILE_NODE_ID_1 = 16388;
+  private static final long FILE_NODE_ID_2 = 16389;
+  private static final long FILE_NODE_ID_3 = 16394;
+  private static final long DIR_NODE_ID = 16391;

  // namespace as written to dfs, to be compared with viewer's output
  final static HashMap<String, FileStatus> writtenFiles = Maps.newHashMap();
@ -635,6 +647,39 @@ public void testPBDelimitedWriter() throws IOException, InterruptedException {
        new FileSystemTestHelper().getTestRootDir() + "/delimited.db");
  }

+  @Test
+  public void testOutputEntryBuilder() throws IOException {
+    PBImageCorruptionDetector corrDetector =
+        new PBImageCorruptionDetector(null, ",", "");
+    PBImageCorruption c1 = new PBImageCorruption(342, true, false, 3);
+    PBImageCorruptionDetector.OutputEntryBuilder entryBuilder1 =
+        new PBImageCorruptionDetector.OutputEntryBuilder(corrDetector, false);
+    entryBuilder1.setParentId(1)
+        .setCorruption(c1)
+        .setParentPath("/dir1/dir2/");
+    assertEquals(entryBuilder1.build(),
+        "MissingChild,342,false,/dir1/dir2/,1,,,3");
+
+    corrDetector = new PBImageCorruptionDetector(null, "\t", "");
+    PBImageCorruption c2 = new PBImageCorruption(781, false, true, 0);
+    PBImageCorruptionDetector.OutputEntryBuilder entryBuilder2 =
+        new PBImageCorruptionDetector.OutputEntryBuilder(corrDetector, true);
+    entryBuilder2.setParentPath("/dir3/")
+        .setCorruption(c2)
+        .setName("folder")
+        .setNodeType("Node");
+    assertEquals(entryBuilder2.build(),
+        "CorruptNode\t781\ttrue\t/dir3/\tMissing\tfolder\tNode\t0");
+  }
+
+  @Test
+  public void testPBCorruptionDetector() throws IOException,
+      InterruptedException {
+    testPBCorruptionDetector("");  // Test in memory db.
+    testPBCorruptionDetector(
+        new FileSystemTestHelper().getTestRootDir() + "/corruption.db");
+  }
+
  @Test
  public void testInvalidProcessorOption() throws Exception {
    int status =
@ -720,7 +765,7 @@ private void testPBDelimitedWriter(String db)
    }

    // writtenFiles does not contain root directory and "invalid XML char" dir.
-    for (Iterator<String> it = fileNames.iterator(); it.hasNext(); ) {
+    for (Iterator<String> it = fileNames.iterator(); it.hasNext();) {
      String filename = it.next();
      if (filename.startsWith("/dirContainingInvalidXMLChar")) {
        it.remove();
@ -731,6 +776,178 @@ private void testPBDelimitedWriter(String db)
    assertEquals(writtenFiles.keySet(), fileNames);
  }

+  private void testPBCorruptionDetector(String db)
+      throws IOException, InterruptedException {
+    final String delimiter = "\t";
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+
+    try (PrintStream o = new PrintStream(output)) {
+      PBImageCorruptionDetector v =
+          new PBImageCorruptionDetector(o, delimiter, db);
+      v.visit(new RandomAccessFile(originalFsimage, "r"));
+    }
+
+    try (
+        ByteArrayInputStream input =
+            new ByteArrayInputStream(output.toByteArray());
+        BufferedReader reader =
+            new BufferedReader(new InputStreamReader(input))) {
+      String line = reader.readLine();
+      System.out.println(line);
+      String[] fields = line.split(delimiter);
+      assertEquals(8, fields.length);
+      PBImageCorruptionDetector v =
+          new PBImageCorruptionDetector(null, delimiter, "");
+      assertEquals(line, v.getHeader());
+      line = reader.readLine();
+      assertNull(line);
+    }
+  }
+
+  private void properINodeDelete(List<Long> idsToDelete, Document doc)
+      throws IOException {
+    NodeList inodes = doc.getElementsByTagName("id");
+    if (inodes.getLength() < 1) {
+      throw new IOException("No id tags found in the image xml.");
+    }
+    for (long idToDelete : idsToDelete) {
+      boolean found = false;
+      for (int i = 0; i < inodes.getLength(); i++) {
+        Node id = inodes.item(i);
+        if (id.getTextContent().equals(String.valueOf(idToDelete))) {
+          found = true;
+          Node inode = id.getParentNode();
+          Node inodeSection = inode.getParentNode();
+          inodeSection.removeChild(inode);
+          break;
+        }
+      }
+      if (!found) {
+        throw new IOException("Couldn't find the id in the image.");
+      }
+    }
+    NodeList numInodesNodes = doc.getElementsByTagName("numInodes");
+    if (numInodesNodes.getLength() != 1) {
+      throw new IOException("More than one numInodes tag found.");
+    }
+    Node numInodesNode = numInodesNodes.item(0);
+    int numberOfINodes = Integer.parseInt(numInodesNode.getTextContent());
+    numberOfINodes -= idsToDelete.size();
+    numInodesNode.setTextContent(String.valueOf(numberOfINodes));
+  }
+
+  private void deleteINodeFromXML(File inputFile, File outputFile,
+      List<Long> corruptibleIds) throws Exception {
+    DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
+    DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
+    Document doc = docBuilder.parse(inputFile);
+
+    properINodeDelete(corruptibleIds, doc);
+
+    TransformerFactory transformerFactory = TransformerFactory.newInstance();
+    Transformer transformer = transformerFactory.newTransformer();
+    DOMSource source = new DOMSource(doc);
+    StreamResult result = new StreamResult(outputFile);
+    transformer.transform(source, result);
+  }
+
+  private void generateMissingNodeCorruption(File goodImageXml,
+      File corruptedImageXml, File corruptedImage, List<Long> corruptibleIds)
+      throws Exception {
+    if (OfflineImageViewerPB.run(new String[] {"-p", "XML",
+        "-i", originalFsimage.getAbsolutePath(),
+        "-o", goodImageXml.getAbsolutePath() }) != 0) {
+      throw new IOException("Couldn't create XML!");
+    }
+    deleteINodeFromXML(goodImageXml, corruptedImageXml, corruptibleIds);
+    if (OfflineImageViewerPB.run(new String[] {"-p", "ReverseXML",
+        "-i", corruptedImageXml.getAbsolutePath(),
+        "-o", corruptedImage.getAbsolutePath() }) != 0) {
+      throw new IOException("Couldn't create from XML!");
+    }
+  }
+
+  private String testCorruptionDetectorRun(int runNumber,
+      List<Long> corruptions, String db) throws Exception {
+    File goodImageXml = new File(tempDir, "goodImage" + runNumber +".xml");
+    File corruptedImageXml = new File(tempDir,
+        "corruptedImage" + runNumber + ".xml");
+    File corruptedImage = new File(originalFsimage.getParent(),
+        "fsimage_corrupted" + runNumber);
+    generateMissingNodeCorruption(goodImageXml, corruptedImageXml,
+        corruptedImage, corruptions);
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    try (PrintStream o = new PrintStream(output)) {
+      PBImageCorruptionDetector v =
+          new PBImageCorruptionDetector(o, ",", db);
+      v.visit(new RandomAccessFile(corruptedImage, "r"));
+    }
+    return output.toString();
+  }
+
+  private String readExpectedFile(String fileName) throws IOException {
+    File file = new File(System.getProperty(
+        "test.cache.data", "build/test/cache"), fileName);
+    BufferedReader reader = new BufferedReader(new FileReader(file));
+    String line;
+    StringBuilder s = new StringBuilder();
+    while ((line = reader.readLine()) != null) {
+      line = line.trim();
+      if (line.length() <= 0 || line.startsWith("#")) {
+        continue;
+      }
+      s.append(line);
+      s.append("\n");
+    }
+    return s.toString();
+  }
+
+  @Test
+  public void testCorruptionDetectionSingleFileCorruption() throws Exception {
+    List<Long> corruptions = Collections.singletonList(FILE_NODE_ID_1);
+    String result = testCorruptionDetectorRun(1, corruptions, "");
+    String expected = readExpectedFile("testSingleFileCorruption.csv");
+    assertEquals(expected, result);
+    result = testCorruptionDetectorRun(2, corruptions,
+        new FileSystemTestHelper().getTestRootDir() + "/corruption2.db");
+    assertEquals(expected, result);
+  }
+
+  @Test
+  public void testCorruptionDetectionMultipleFileCorruption() throws Exception {
+    List<Long> corruptions = Arrays.asList(FILE_NODE_ID_1, FILE_NODE_ID_2,
+        FILE_NODE_ID_3);
+    String result = testCorruptionDetectorRun(3, corruptions, "");
+    String expected = readExpectedFile("testMultipleFileCorruption.csv");
+    assertEquals(expected, result);
+    result = testCorruptionDetectorRun(4, corruptions,
+        new FileSystemTestHelper().getTestRootDir() + "/corruption4.db");
+    assertEquals(expected, result);
+  }
+
+  @Test
+  public void testCorruptionDetectionSingleFolderCorruption() throws Exception {
+    List<Long> corruptions = Collections.singletonList(DIR_NODE_ID);
+    String result = testCorruptionDetectorRun(5, corruptions, "");
+    String expected = readExpectedFile("testSingleFolderCorruption.csv");
+    assertEquals(expected, result);
+    result = testCorruptionDetectorRun(6, corruptions,
+        new FileSystemTestHelper().getTestRootDir() + "/corruption6.db");
+    assertEquals(expected, result);
+  }
+
+  @Test
+  public void testCorruptionDetectionMultipleCorruption() throws Exception {
+    List<Long> corruptions = Arrays.asList(FILE_NODE_ID_1, FILE_NODE_ID_2,
+        FILE_NODE_ID_3, DIR_NODE_ID);
+    String result = testCorruptionDetectorRun(7, corruptions, "");
+    String expected = readExpectedFile("testMultipleCorruption.csv");
+    assertEquals(expected, result);
+    result = testCorruptionDetectorRun(8, corruptions,
+        new FileSystemTestHelper().getTestRootDir() + "/corruption8.db");
+    assertEquals(expected, result);
+  }
+
  private static void compareFile(FileStatus expected, FileStatus status) {
    assertEquals(expected.getAccessTime(), status.getAccessTime());
    assertEquals(expected.getBlockSize(), status.getBlockSize());
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestPBImageCorruption.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestPBImageCorruption.java
@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Unit tests for PBImageCorruptionType, CorruptionEntryBuilder and
+ * PBImageCorruption classes.
+ */
+public class TestPBImageCorruption {
+  @Test
+  public void testProperCorruptionTypeCreation() {
+    PBImageCorruption ct = new PBImageCorruption(209, false, true, 1);
+    assertEquals("CorruptNode", ct.getType());
+    ct.addMissingChildCorruption();
+    assertEquals("CorruptNodeWithMissingChild", ct.getType());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testImproperCorruptionTypeCreation() {
+    PBImageCorruption ct = new PBImageCorruption(210, false, false, 2);
+  }
+
+  @Test
+  public void testCorruptionClass() {
+    PBImageCorruption c = new PBImageCorruption(211, true, false, 3);
+    String expected = "MissingChild";
+    assertEquals(211, c.getId());
+    assertEquals(expected, c.getType());
+    assertEquals(3, c.getNumOfCorruptChildren());
+    c.addCorruptNodeCorruption();
+    expected = "CorruptNodeWithMissingChild";
+    c.setNumberOfCorruption(34);
+    assertEquals(expected, c.getType());
+    assertEquals(34, c.getNumOfCorruptChildren());
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testMultipleCorruption.csv
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testMultipleCorruption.csv
@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CorruptionType,Id,IsSnapshot,ParentPath,ParentId,Name,NodeType,CorruptChildren
+MissingChild,16385,false,/,Missing,,Node,1
+MissingChild,16386,false,/,16385,dir0,Node,2
+CorruptNode,16388,true,,16386,,Unknown,0
+CorruptNode,16389,true,,16386,,Unknown,0
+CorruptNodeWithMissingChild,16391,true,,16385,,Unknown,1
+CorruptNode,16394,true,,16391,,Unknown,0
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testMultipleFileCorruption.csv
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testMultipleFileCorruption.csv
@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CorruptionType,Id,IsSnapshot,ParentPath,ParentId,Name,NodeType,CorruptChildren
+MissingChild,16386,false,/,16385,dir0,Node,2
+MissingChild,16391,false,/,16385,dir1,Node,1
+CorruptNode,16388,true,,16386,,Unknown,0
+CorruptNode,16389,true,,16386,,Unknown,0
+CorruptNode,16394,true,,16391,,Unknown,0
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testSingleFileCorruption.csv
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testSingleFileCorruption.csv
@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CorruptionType,Id,IsSnapshot,ParentPath,ParentId,Name,NodeType,CorruptChildren
+MissingChild,16386,false,/,16385,dir0,Node,1
+CorruptNode,16388,true,,16386,,Unknown,0
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testSingleFolderCorruption.csv
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testSingleFolderCorruption.csv
@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CorruptionType,Id,IsSnapshot,ParentPath,ParentId,Name,NodeType,CorruptChildren
+MissingChild,16385,false,/,Missing,,Node,1
+CorruptNode,16391,true,,16385,,Unknown,0