From f27a976a2f65cf758f259ccd3d393dd868c3eb1f Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Thu, 23 Apr 2015 14:19:33 -0700 Subject: [PATCH] HDFS-8215. Refactor NamenodeFsck#check method. Contributed by Takanobu Asanuma --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/server/namenode/NamenodeFsck.java | 164 +++++++++++------- 2 files changed, 107 insertions(+), 60 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e0f13aa27a0..813ce78ac24 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -146,6 +146,9 @@ Release 2.8.0 - UNRELEASED HDFS-4448. Allow HA NN to start in secure mode with wildcard address configured (atm via asuresh) + HDFS-8215. Refactor NamenodeFsck#check method. (Takanobu Asanuma + via szetszwo) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 52fc1054e75..3bd3405beb3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -426,36 +426,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { @VisibleForTesting void check(String parent, HdfsFileStatus file, Result res) throws IOException { String path = file.getFullName(parent); - boolean isOpen = false; - if (file.isDir()) { - if (snapshottableDirs != null && snapshottableDirs.contains(path)) { - String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path - + Path.SEPARATOR) - + HdfsConstants.DOT_SNAPSHOT_DIR; - HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo( - snapshotPath); - check(snapshotPath, snapshotFileInfo, res); - } - byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME; - DirectoryListing thisListing; - if (showFiles) { - out.println(path + " "); - } - res.totalDirs++; - do { - assert lastReturnedName != null; - thisListing = namenode.getRpcServer().getListing( - path, lastReturnedName, false); - if (thisListing == null) { - return; - } - HdfsFileStatus[] files = thisListing.getPartialListing(); - for (int i = 0; i < files.length; i++) { - check(path, files[i], res); - } - lastReturnedName = thisListing.getLastName(); - } while (thisListing.hasMore()); + checkDir(path, res); return; } if (file.isSymlink()) { @@ -465,9 +437,47 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { res.totalSymlinks++; return; } + LocatedBlocks blocks = getBlockLocations(path, file); + if (blocks == null) { // the file is deleted + return; + } + collectFileSummary(path, file, res, blocks); + collectBlocksSummary(parent, file, res, blocks); + } + + private void checkDir(String path, Result res) throws IOException { + if (snapshottableDirs != null && snapshottableDirs.contains(path)) { + String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path + + Path.SEPARATOR) + + HdfsConstants.DOT_SNAPSHOT_DIR; + HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo( + snapshotPath); + check(snapshotPath, snapshotFileInfo, res); + } + byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME; + DirectoryListing thisListing; + if (showFiles) { + out.println(path + " "); + } + res.totalDirs++; + do { + assert lastReturnedName != null; + thisListing = namenode.getRpcServer().getListing( + path, lastReturnedName, false); + if (thisListing == null) { + return; + } + HdfsFileStatus[] files = thisListing.getPartialListing(); + for (int i = 0; i < files.length; i++) { + check(path, files[i], res); + } + lastReturnedName = thisListing.getLastName(); + } while (thisListing.hasMore()); + } + + private LocatedBlocks getBlockLocations(String path, HdfsFileStatus file) + throws IOException { long fileLen = file.getLen(); - // Get block locations without updating the file access time - // and without block access tokens LocatedBlocks blocks = null; FSNamesystem fsn = namenode.getNamesystem(); fsn.readLock(); @@ -478,10 +488,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { } finally { fsn.readUnlock(); } - if (blocks == null) { // the file is deleted - return; - } - isOpen = blocks.isUnderConstruction(); + return blocks; + } + + private void collectFileSummary(String path, HdfsFileStatus file, Result res, + LocatedBlocks blocks) throws IOException { + long fileLen = file.getLen(); + boolean isOpen = blocks.isUnderConstruction(); if (isOpen && !showOpenFiles) { // We collect these stats about open files to report with default options res.totalOpenFilesSize += fileLen; @@ -502,57 +515,67 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.print('.'); } if (res.totalFiles % 100 == 0) { out.println(); out.flush(); } + } + + private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res, + LocatedBlocks blocks) throws IOException { + String path = file.getFullName(parent); + boolean isOpen = blocks.isUnderConstruction(); int missing = 0; int corrupt = 0; long missize = 0; int underReplicatedPerFile = 0; int misReplicatedPerFile = 0; StringBuilder report = new StringBuilder(); - int i = 0; + int blockNumber = 0; for (LocatedBlock lBlk : blocks.getLocatedBlocks()) { ExtendedBlock block = lBlk.getBlock(); - boolean isCorrupt = lBlk.isCorrupt(); - String blkName = block.toString(); BlockManager bm = namenode.getNamesystem().getBlockManager(); + + // count decommissionedReplicas / decommissioningReplicas NumberReplicas numberReplicas = bm.countNodes(block.getLocalBlock()); - int liveReplicas = numberReplicas.liveReplicas(); int decommissionedReplicas = numberReplicas.decommissioned();; int decommissioningReplicas = numberReplicas.decommissioning(); res.decommissionedReplicas += decommissionedReplicas; res.decommissioningReplicas += decommissioningReplicas; - int totalReplicas = liveReplicas + decommissionedReplicas + + + // count total replicas + int liveReplicas = numberReplicas.liveReplicas(); + int totalReplicasPerBlock = liveReplicas + decommissionedReplicas + decommissioningReplicas; - res.totalReplicas += totalReplicas; - Collection corruptReplicas = null; - if (showReplicaDetails) { - corruptReplicas = bm.getCorruptReplicas(block.getLocalBlock()); - } + res.totalReplicas += totalReplicasPerBlock; + + // count expected replicas short targetFileReplication = file.getReplication(); res.numExpectedReplicas += targetFileReplication; - if(totalReplicas < minReplication){ + + // count under min repl'd blocks + if(totalReplicasPerBlock < minReplication){ res.numUnderMinReplicatedBlocks++; } + + // count excessive Replicas / over replicated blocks if (liveReplicas > targetFileReplication) { res.excessiveReplicas += (liveReplicas - targetFileReplication); res.numOverReplicatedBlocks += 1; } - //keep track of storage tier counts - if (this.showStoragePolcies && lBlk.getStorageTypes() != null) { - StorageType[] storageTypes = lBlk.getStorageTypes(); - storageTypeSummary.add(Arrays.copyOf(storageTypes, storageTypes.length), - fsn.getBlockManager().getStoragePolicy(file.getStoragePolicy())); - } - // Check if block is Corrupt + + // count corrupt blocks + boolean isCorrupt = lBlk.isCorrupt(); if (isCorrupt) { corrupt++; res.corruptBlocks++; out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName()+"\n"); } - if (totalReplicas >= minReplication) + + // count minimally replicated blocks + if (totalReplicasPerBlock >= minReplication) res.numMinReplicatedBlocks++; - if (totalReplicas < targetFileReplication && totalReplicas > 0) { - res.missingReplicas += (targetFileReplication - totalReplicas); + + // count missing replicas / under replicated blocks + if (totalReplicasPerBlock < targetFileReplication && totalReplicasPerBlock > 0) { + res.missingReplicas += (targetFileReplication - totalReplicasPerBlock); res.numUnderReplicatedBlocks += 1; underReplicatedPerFile++; if (!showFiles) { @@ -565,7 +588,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { decommissionedReplicas + " decommissioned replica(s) and " + decommissioningReplicas + " decommissioning replica(s)."); } - // verify block placement policy + + // count mis replicated blocks block BlockPlacementStatus blockPlacementStatus = bpPolicy .verifyBlockPlacement(path, lBlk, targetFileReplication); if (!blockPlacementStatus.isPlacementPolicySatisfied()) { @@ -579,8 +603,16 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.println(" Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription()); } - report.append(i + ". " + blkName + " len=" + block.getNumBytes()); - if (totalReplicas == 0) { + + // count storage summary + if (this.showStoragePolcies && lBlk.getStorageTypes() != null) { + countStorageTypeSummary(file, lBlk); + } + + // report + String blkName = block.toString(); + report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes()); + if (totalReplicasPerBlock == 0) { report.append(" MISSING!"); res.addMissing(block.toString(), block.getNumBytes()); missing++; @@ -602,6 +634,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { if (showReplicaDetails) { LightWeightLinkedSet blocksExcess = bm.excessReplicateMap.get(dnDesc.getDatanodeUuid()); + Collection corruptReplicas = + bm.getCorruptReplicas(block.getLocalBlock()); sb.append("("); if (dnDesc.isDecommissioned()) { sb.append("DECOMMISSIONED)"); @@ -628,8 +662,10 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { } } report.append('\n'); - i++; + blockNumber++; } + + // count corrupt file & move or delete if necessary if ((missing > 0) || (corrupt > 0)) { if (!showFiles && (missing > 0)) { out.print("\n" + path + ": MISSING " + missing @@ -643,6 +679,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { if (doDelete) deleteCorruptedFile(path); } } + if (showFiles) { if (missing > 0) { out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n"); @@ -655,6 +692,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { } } + private void countStorageTypeSummary(HdfsFileStatus file, LocatedBlock lBlk) { + StorageType[] storageTypes = lBlk.getStorageTypes(); + storageTypeSummary.add(Arrays.copyOf(storageTypes, storageTypes.length), + namenode.getNamesystem().getBlockManager() + .getStoragePolicy(file.getStoragePolicy())); + } + private void deleteCorruptedFile(String path) { try { namenode.getRpcServer().delete(path, true);