From 293c763e82586b4f3230771a86734aae593f468d Mon Sep 17 00:00:00 2001 From: cnauroth Date: Sat, 11 Apr 2015 13:23:18 -0700 Subject: [PATCH] HDFS-7933. fsck should also report decommissioning replicas. Contributed by Xiaoyu Yao. (cherry picked from commit f8f5887209a7d8e53c0a77abef275cbcaf1f7a5b) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/blockmanagement/BlockManager.java | 39 +++++---- .../blockmanagement/DecommissionManager.java | 7 +- .../blockmanagement/NumberReplicas.java | 57 +++++++++++-- .../hdfs/server/namenode/NamenodeFsck.java | 40 ++++++--- .../hadoop/hdfs/TestClientReportBadBlock.java | 2 +- .../datanode/TestReadOnlySharedStorage.java | 2 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 81 +++++++++++++++++++ 8 files changed, 192 insertions(+), 39 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 2fcf0a756da..695b67db1c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -107,6 +107,9 @@ Release 2.8.0 - UNRELEASED HdfsClientConfigKeys.Failover and fix typos in the dfs.http.client.* configuration keys. (szetszwo) + HDFS-7933. fsck should also report decommissioning replicas. + (Xiaoyu Yao via cnauroth) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index d06d482e92a..f9097b846a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -539,7 +539,7 @@ public class BlockManager { // not included in the numReplicas.liveReplicas() count assert containingLiveReplicasNodes.size() >= numReplicas.liveReplicas(); int usableReplicas = numReplicas.liveReplicas() + - numReplicas.decommissionedReplicas(); + numReplicas.decommissionedAndDecommissioning(); if (block instanceof BlockInfoContiguous) { BlockCollection bc = ((BlockInfoContiguous) block).getBlockCollection(); @@ -550,7 +550,7 @@ public class BlockManager { out.print(block + ((usableReplicas > 0)? "" : " MISSING") + " (replicas:" + " l: " + numReplicas.liveReplicas() + - " d: " + numReplicas.decommissionedReplicas() + + " d: " + numReplicas.decommissionedAndDecommissioning() + " c: " + numReplicas.corruptReplicas() + " e: " + numReplicas.excessReplicas() + ") "); @@ -730,7 +730,7 @@ public class BlockManager { // Remove block from replication queue. NumberReplicas replicas = countNodes(ucBlock); neededReplications.remove(ucBlock, replicas.liveReplicas(), - replicas.decommissionedReplicas(), getReplication(ucBlock)); + replicas.decommissionedAndDecommissioning(), getReplication(ucBlock)); pendingReplications.remove(ucBlock); // remove this block from the list of pending blocks to be deleted. @@ -1617,6 +1617,7 @@ public class BlockManager { DatanodeDescriptor srcNode = null; int live = 0; int decommissioned = 0; + int decommissioning = 0; int corrupt = 0; int excess = 0; @@ -1628,9 +1629,11 @@ public class BlockManager { int countableReplica = storage.getState() == State.NORMAL ? 1 : 0; if ((nodesCorrupt != null) && (nodesCorrupt.contains(node))) corrupt += countableReplica; - else if (node.isDecommissionInProgress() || node.isDecommissioned()) + else if (node.isDecommissionInProgress()) { + decommissioning += countableReplica; + } else if (node.isDecommissioned()) { decommissioned += countableReplica; - else if (excessBlocks != null && excessBlocks.contains(block)) { + } else if (excessBlocks != null && excessBlocks.contains(block)) { excess += countableReplica; } else { nodesContainingLiveReplicas.add(storage); @@ -1670,7 +1673,8 @@ public class BlockManager { srcNode = node; } if(numReplicas != null) - numReplicas.initialize(live, decommissioned, corrupt, excess, 0); + numReplicas.initialize(live, decommissioned, decommissioning, corrupt, + excess, 0); return srcNode; } @@ -1689,7 +1693,7 @@ public class BlockManager { num.liveReplicas())) { neededReplications.add(timedOutItems[i], num.liveReplicas(), - num.decommissionedReplicas(), + num.decommissionedAndDecommissioning(), getReplication(timedOutItems[i])); } } @@ -2576,7 +2580,7 @@ public class BlockManager { short fileReplication = bc.getBlockReplication(); if (!isNeededReplication(storedBlock, fileReplication, numCurrentReplica)) { neededReplications.remove(storedBlock, numCurrentReplica, - num.decommissionedReplicas(), fileReplication); + num.decommissionedAndDecommissioning(), fileReplication); } else { updateNeededReplications(storedBlock, curReplicaDelta, 0); } @@ -2810,7 +2814,7 @@ public class BlockManager { // add to under-replicated queue if need to be if (isNeededReplication(block, expectedReplication, numCurrentReplica)) { if (neededReplications.add(block, numCurrentReplica, num - .decommissionedReplicas(), expectedReplication)) { + .decommissionedAndDecommissioning(), expectedReplication)) { return MisReplicationResult.UNDER_REPLICATED; } } @@ -3224,6 +3228,7 @@ public class BlockManager { */ public NumberReplicas countNodes(Block b) { int decommissioned = 0; + int decommissioning = 0; int live = 0; int corrupt = 0; int excess = 0; @@ -3233,7 +3238,9 @@ public class BlockManager { final DatanodeDescriptor node = storage.getDatanodeDescriptor(); if ((nodesCorrupt != null) && (nodesCorrupt.contains(node))) { corrupt++; - } else if (node.isDecommissionInProgress() || node.isDecommissioned()) { + } else if (node.isDecommissionInProgress()) { + decommissioning++; + } else if (node.isDecommissioned()) { decommissioned++; } else { LightWeightLinkedSet blocksExcess = excessReplicateMap.get(node @@ -3248,7 +3255,7 @@ public class BlockManager { stale++; } } - return new NumberReplicas(live, decommissioned, corrupt, excess, stale); + return new NumberReplicas(live, decommissioned, decommissioning, corrupt, excess, stale); } /** @@ -3385,13 +3392,13 @@ public class BlockManager { int curExpectedReplicas = getReplication(block); if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) { neededReplications.update(block, repl.liveReplicas(), repl - .decommissionedReplicas(), curExpectedReplicas, curReplicasDelta, - expectedReplicasDelta); + .decommissionedAndDecommissioning(), curExpectedReplicas, + curReplicasDelta, expectedReplicasDelta); } else { int oldReplicas = repl.liveReplicas()-curReplicasDelta; int oldExpectedReplicas = curExpectedReplicas-expectedReplicasDelta; - neededReplications.remove(block, oldReplicas, repl.decommissionedReplicas(), - oldExpectedReplicas); + neededReplications.remove(block, oldReplicas, + repl.decommissionedAndDecommissioning(), oldExpectedReplicas); } } finally { namesystem.writeUnlock(); @@ -3410,7 +3417,7 @@ public class BlockManager { final NumberReplicas n = countNodes(block); if (isNeededReplication(block, expected, n.liveReplicas())) { neededReplications.add(block, n.liveReplicas(), - n.decommissionedReplicas(), expected); + n.decommissionedAndDecommissioning(), expected); } else if (n.liveReplicas() > expected) { processOverReplicatedBlock(block, expected, null, null); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java index 7f3d77802ec..5c9aec76adc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java @@ -298,7 +298,8 @@ public class DecommissionManager { LOG.info("Block: " + block + ", Expected Replicas: " + curExpectedReplicas + ", live replicas: " + curReplicas + ", corrupt replicas: " + num.corruptReplicas() - + ", decommissioned replicas: " + num.decommissionedReplicas() + + ", decommissioned replicas: " + num.decommissioned() + + ", decommissioning replicas: " + num.decommissioning() + ", excess replicas: " + num.excessReplicas() + ", Is Open File: " + bc.isUnderConstruction() + ", Datanodes having this block: " + nodeList + ", Current Datanode: " @@ -571,7 +572,7 @@ public class DecommissionManager { // Process these blocks only when active NN is out of safe mode. blockManager.neededReplications.add(block, curReplicas, - num.decommissionedReplicas(), + num.decommissionedAndDecommissioning(), bc.getBlockReplication()); } } @@ -600,7 +601,7 @@ public class DecommissionManager { if (bc.isUnderConstruction()) { underReplicatedInOpenFiles++; } - if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) { + if ((curReplicas == 0) && (num.decommissionedAndDecommissioning() > 0)) { decommissionOnlyReplicas++; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java index 9e5c8dfd5ec..e567bbf3a2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java @@ -19,26 +19,33 @@ package org.apache.hadoop.hdfs.server.blockmanagement; /** * A immutable object that stores the number of live replicas and - * the number of decommissined Replicas. + * the number of decommissioned Replicas. */ public class NumberReplicas { private int liveReplicas; - private int decommissionedReplicas; + + // Tracks only the decommissioning replicas + private int decommissioning; + // Tracks only the decommissioned replicas + private int decommissioned; private int corruptReplicas; private int excessReplicas; private int replicasOnStaleNodes; NumberReplicas() { - initialize(0, 0, 0, 0, 0); + initialize(0, 0, 0, 0, 0, 0); } - NumberReplicas(int live, int decommissioned, int corrupt, int excess, int stale) { - initialize(live, decommissioned, corrupt, excess, stale); + NumberReplicas(int live, int decommissioned, int decommissioning, int corrupt, + int excess, int stale) { + initialize(live, decommissioned, decommissioning, corrupt, excess, stale); } - void initialize(int live, int decommissioned, int corrupt, int excess, int stale) { + void initialize(int live, int decommissioned, int decommissioning, + int corrupt, int excess, int stale) { liveReplicas = live; - decommissionedReplicas = decommissioned; + this.decommissioning = decommissioning; + this.decommissioned = decommissioned; corruptReplicas = corrupt; excessReplicas = excess; replicasOnStaleNodes = stale; @@ -47,12 +54,46 @@ public class NumberReplicas { public int liveReplicas() { return liveReplicas; } + + /** + * + * @return decommissioned replicas + decommissioning replicas + * It is deprecated by decommissionedAndDecommissioning + * due to its misleading name. + */ + @Deprecated public int decommissionedReplicas() { - return decommissionedReplicas; + return decommissionedAndDecommissioning(); } + + /** + * + * @return decommissioned and decommissioning replicas + */ + public int decommissionedAndDecommissioning() { + return decommissioned + decommissioning; + } + + /** + * + * @return decommissioned replicas only + */ + public int decommissioned() { + return decommissioned; + } + + /** + * + * @return decommissioning replicas only + */ + public int decommissioning() { + return decommissioning; + } + public int corruptReplicas() { return corruptReplicas; } + public int excessReplicas() { return excessReplicas; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index cc2c984baad..a1ad4d3b10a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -248,8 +248,10 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.println("No. of live Replica: " + numberReplicas.liveReplicas()); out.println("No. of excess Replica: " + numberReplicas.excessReplicas()); out.println("No. of stale Replica: " + numberReplicas.replicasOnStaleNodes()); - out.println("No. of decommission Replica: " - + numberReplicas.decommissionedReplicas()); + out.println("No. of decommissioned Replica: " + + numberReplicas.decommissioned()); + out.println("No. of decommissioning Replica: " + + numberReplicas.decommissioning()); out.println("No. of corrupted Replica: " + numberReplicas.corruptReplicas()); //record datanodes that have corrupted block replica Collection corruptionRecord = null; @@ -504,10 +506,16 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { NumberReplicas numberReplicas = namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock()); int liveReplicas = numberReplicas.liveReplicas(); - res.totalReplicas += liveReplicas; + int decommissionedReplicas = numberReplicas.decommissioned();; + int decommissioningReplicas = numberReplicas.decommissioning(); + res.decommissionedReplicas += decommissionedReplicas; + res.decommissioningReplicas += decommissioningReplicas; + int totalReplicas = liveReplicas + decommissionedReplicas + + decommissioningReplicas; + res.totalReplicas += totalReplicas; short targetFileReplication = file.getReplication(); res.numExpectedReplicas += targetFileReplication; - if(liveReplicas targetFileReplication) { @@ -527,10 +535,10 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName()+"\n"); } - if (liveReplicas >= minReplication) + if (totalReplicas >= minReplication) res.numMinReplicatedBlocks++; - if (liveReplicas < targetFileReplication && liveReplicas > 0) { - res.missingReplicas += (targetFileReplication - liveReplicas); + if (totalReplicas < targetFileReplication && totalReplicas > 0) { + res.missingReplicas += (targetFileReplication - totalReplicas); res.numUnderReplicatedBlocks += 1; underReplicatedPerFile++; if (!showFiles) { @@ -539,7 +547,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.println(" Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + - liveReplicas + " replica(s)."); + liveReplicas + " live replica(s), " + + decommissionedReplicas + " decommissioned replica(s) and " + + decommissioningReplicas + " decommissioning replica(s)."); } // verify block placement policy BlockPlacementStatus blockPlacementStatus = bpPolicy @@ -556,7 +566,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { block + ". " + blockPlacementStatus.getErrorDescription()); } report.append(i + ". " + blkName + " len=" + block.getNumBytes()); - if (liveReplicas == 0) { + if (totalReplicas == 0) { report.append(" MISSING!"); res.addMissing(block.toString(), block.getNumBytes()); missing++; @@ -856,6 +866,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { long corruptBlocks = 0L; long excessiveReplicas = 0L; long missingReplicas = 0L; + long decommissionedReplicas = 0L; + long decommissioningReplicas = 0L; long numUnderMinReplicatedBlocks=0L; long numOverReplicatedBlocks = 0L; long numUnderReplicatedBlocks = 0L; @@ -927,7 +939,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { res.append(" (Total open file blocks (not validated): ").append( totalOpenFilesBlocks).append(")"); } - if (corruptFiles > 0 || numUnderMinReplicatedBlocks>0) { + if (corruptFiles > 0 || numUnderMinReplicatedBlocks > 0) { res.append("\n ********************************"); if(numUnderMinReplicatedBlocks>0){ res.append("\n UNDER MIN REPL'D BLOCKS:\t").append(numUnderMinReplicatedBlocks); @@ -990,6 +1002,14 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { ((float) (missingReplicas * 100) / (float) numExpectedReplicas)).append( " %)"); } + if (decommissionedReplicas > 0) { + res.append("\n DecommissionedReplicas:\t").append( + decommissionedReplicas); + } + if (decommissioningReplicas > 0) { + res.append("\n DecommissioningReplicas:\t").append( + decommissioningReplicas); + } return res.toString(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java index 0c9660ede91..b3580b28700 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java @@ -192,7 +192,7 @@ public class TestClientReportBadBlock { verifyFirstBlockCorrupted(filePath, false); int expectedReplicaCount = repl-corruptBlocReplicas; verifyCorruptedBlockCount(filePath, expectedReplicaCount); - verifyFsckHealth("Target Replicas is 3 but found 1 replica"); + verifyFsckHealth("Target Replicas is 3 but found 1 live replica"); testFsckListCorruptFilesBlocks(filePath, 0); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java index e6bf0672d56..8f99afba1f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java @@ -192,7 +192,7 @@ public class TestReadOnlySharedStorage { assertThat(numberReplicas.liveReplicas(), is(expectedReplicas)); assertThat(numberReplicas.excessReplicas(), is(0)); assertThat(numberReplicas.corruptReplicas(), is(0)); - assertThat(numberReplicas.decommissionedReplicas(), is(0)); + assertThat(numberReplicas.decommissionedAndDecommissioning(), is(0)); assertThat(numberReplicas.replicasOnStaleNodes(), is(0)); BlockManagerTestUtil.updateState(blockManager); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index d53c896d9c0..eee2e95aa62 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -1464,4 +1464,85 @@ public class TestFsck { } } } + + /** + * Test for blocks on decommissioning hosts are not shown as missing + */ + @Test + public void testFsckWithDecommissionedReplicas() throws Exception { + + final short REPL_FACTOR = 1; + short NUM_DN = 2; + final long blockSize = 512; + final long fileSize = 1024; + boolean checkDecommissionInProgress = false; + String [] racks = {"/rack1", "/rack2"}; + String [] hosts = {"host1", "host2"}; + + Configuration conf = new Configuration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1); + + MiniDFSCluster cluster; + DistributedFileSystem dfs ; + cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts) + .racks(racks).build(); + + assertNotNull("Failed Cluster Creation", cluster); + cluster.waitClusterUp(); + dfs = cluster.getFileSystem(); + assertNotNull("Failed to get FileSystem", dfs); + + DFSTestUtil util = new DFSTestUtil.Builder(). + setName(getClass().getSimpleName()).setNumFiles(1).build(); + + //create files + final String testFile = new String("/testfile"); + final Path path = new Path(testFile); + util.createFile(dfs, path, fileSize, REPL_FACTOR, 1000L); + util.waitReplication(dfs, path, REPL_FACTOR); + try { + // make sure datanode that has replica is fine before decommission + String outStr = runFsck(conf, 0, true, testFile); + System.out.println(outStr); + assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); + + // decommission datanode + ExtendedBlock eb = util.getFirstBlock(dfs, path); + DatanodeDescriptor dn = cluster.getNameNode().getNamesystem() + .getBlockManager().getBlockCollection(eb.getLocalBlock()) + .getBlocks()[0].getDatanode(0); + cluster.getNameNode().getNamesystem().getBlockManager() + .getDatanodeManager().getDecomManager().startDecommission(dn); + String dnName = dn.getXferAddr(); + + // wait for decommission start + DatanodeInfo datanodeInfo = null; + int count = 0; + do { + Thread.sleep(2000); + for (DatanodeInfo info : dfs.getDataNodeStats()) { + if (dnName.equals(info.getXferAddr())) { + datanodeInfo = info; + } + } + // check the replica status should be healthy(0) + // instead of corruption (1) during decommissioning + if(!checkDecommissionInProgress && datanodeInfo != null + && datanodeInfo.isDecommissionInProgress()) { + String fsckOut = runFsck(conf, 0, true, testFile); + checkDecommissionInProgress = true; + } + } while (datanodeInfo != null && !datanodeInfo.isDecommissioned()); + + // check the replica status should be healthy(0) after decommission + // is done + String fsckOut = runFsck(conf, 0, true, testFile); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } }