From 040a1b7b90d10ffab9d2fe6e68935c045c72baf8 Mon Sep 17 00:00:00 2001 From: Zhe Zhang Date: Tue, 16 Aug 2016 10:35:52 -0700 Subject: [PATCH] HDFS-7933. fsck should also report decommissioning replicas. Contributed by Xiaoyu Yao. (cherry picked from commit 293c763e82586b4f3230771a86734aae593f468d) --- .../server/blockmanagement/BlockManager.java | 39 +++++---- .../blockmanagement/DecommissionManager.java | 7 +- .../blockmanagement/NumberReplicas.java | 57 +++++++++++-- .../hdfs/server/namenode/NamenodeFsck.java | 40 ++++++--- .../hadoop/hdfs/TestClientReportBadBlock.java | 2 +- .../datanode/TestReadOnlySharedStorage.java | 2 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 81 +++++++++++++++++++ 7 files changed, 189 insertions(+), 39 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 8f37bc60f36..7f54476b2ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -543,7 +543,7 @@ public class BlockManager { // not included in the numReplicas.liveReplicas() count assert containingLiveReplicasNodes.size() >= numReplicas.liveReplicas(); int usableReplicas = numReplicas.liveReplicas() + - numReplicas.decommissionedReplicas(); + numReplicas.decommissionedAndDecommissioning(); if (block instanceof BlockInfoContiguous) { BlockCollection bc = ((BlockInfoContiguous) block).getBlockCollection(); @@ -554,7 +554,7 @@ public class BlockManager { out.print(block + ((usableReplicas > 0)? "" : " MISSING") + " (replicas:" + " l: " + numReplicas.liveReplicas() + - " d: " + numReplicas.decommissionedReplicas() + + " d: " + numReplicas.decommissionedAndDecommissioning() + " c: " + numReplicas.corruptReplicas() + " e: " + numReplicas.excessReplicas() + ") "); @@ -738,7 +738,7 @@ public class BlockManager { // Remove block from replication queue. NumberReplicas replicas = countNodes(ucBlock); neededReplications.remove(ucBlock, replicas.liveReplicas(), - replicas.decommissionedReplicas(), getReplication(ucBlock)); + replicas.decommissionedAndDecommissioning(), getReplication(ucBlock)); pendingReplications.remove(ucBlock); // remove this block from the list of pending blocks to be deleted. @@ -1637,6 +1637,7 @@ public class BlockManager { DatanodeDescriptor srcNode = null; int live = 0; int decommissioned = 0; + int decommissioning = 0; int corrupt = 0; int excess = 0; @@ -1648,9 +1649,11 @@ public class BlockManager { int countableReplica = storage.getState() == State.NORMAL ? 1 : 0; if ((nodesCorrupt != null) && (nodesCorrupt.contains(node))) corrupt += countableReplica; - else if (node.isDecommissionInProgress() || node.isDecommissioned()) + else if (node.isDecommissionInProgress()) { + decommissioning += countableReplica; + } else if (node.isDecommissioned()) { decommissioned += countableReplica; - else if (excessBlocks != null && excessBlocks.contains(block)) { + } else if (excessBlocks != null && excessBlocks.contains(block)) { excess += countableReplica; } else { nodesContainingLiveReplicas.add(storage); @@ -1690,7 +1693,8 @@ public class BlockManager { srcNode = node; } if(numReplicas != null) - numReplicas.initialize(live, decommissioned, corrupt, excess, 0); + numReplicas.initialize(live, decommissioned, decommissioning, corrupt, + excess, 0); return srcNode; } @@ -1715,7 +1719,7 @@ public class BlockManager { NumberReplicas num = countNodes(timedOutItems[i]); if (isNeededReplication(bi, getReplication(bi), num.liveReplicas())) { neededReplications.add(bi, num.liveReplicas(), - num.decommissionedReplicas(), getReplication(bi)); + num.decommissionedAndDecommissioning(), getReplication(bi)); } } } finally { @@ -2619,7 +2623,7 @@ public class BlockManager { short fileReplication = bc.getBlockReplication(); if (!isNeededReplication(storedBlock, fileReplication, numCurrentReplica)) { neededReplications.remove(storedBlock, numCurrentReplica, - num.decommissionedReplicas(), fileReplication); + num.decommissionedAndDecommissioning(), fileReplication); } else { updateNeededReplications(storedBlock, curReplicaDelta, 0); } @@ -2853,7 +2857,7 @@ public class BlockManager { // add to under-replicated queue if need to be if (isNeededReplication(block, expectedReplication, numCurrentReplica)) { if (neededReplications.add(block, numCurrentReplica, num - .decommissionedReplicas(), expectedReplication)) { + .decommissionedAndDecommissioning(), expectedReplication)) { return MisReplicationResult.UNDER_REPLICATED; } } @@ -3225,6 +3229,7 @@ public class BlockManager { */ public NumberReplicas countNodes(Block b) { int decommissioned = 0; + int decommissioning = 0; int live = 0; int corrupt = 0; int excess = 0; @@ -3234,7 +3239,9 @@ public class BlockManager { final DatanodeDescriptor node = storage.getDatanodeDescriptor(); if ((nodesCorrupt != null) && (nodesCorrupt.contains(node))) { corrupt++; - } else if (node.isDecommissionInProgress() || node.isDecommissioned()) { + } else if (node.isDecommissionInProgress()) { + decommissioning++; + } else if (node.isDecommissioned()) { decommissioned++; } else { LightWeightLinkedSet blocksExcess = excessReplicateMap.get(node @@ -3249,7 +3256,7 @@ public class BlockManager { stale++; } } - return new NumberReplicas(live, decommissioned, corrupt, excess, stale); + return new NumberReplicas(live, decommissioned, decommissioning, corrupt, excess, stale); } /** @@ -3386,13 +3393,13 @@ public class BlockManager { int curExpectedReplicas = getReplication(block); if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) { neededReplications.update(block, repl.liveReplicas(), repl - .decommissionedReplicas(), curExpectedReplicas, curReplicasDelta, - expectedReplicasDelta); + .decommissionedAndDecommissioning(), curExpectedReplicas, + curReplicasDelta, expectedReplicasDelta); } else { int oldReplicas = repl.liveReplicas()-curReplicasDelta; int oldExpectedReplicas = curExpectedReplicas-expectedReplicasDelta; - neededReplications.remove(block, oldReplicas, repl.decommissionedReplicas(), - oldExpectedReplicas); + neededReplications.remove(block, oldReplicas, + repl.decommissionedAndDecommissioning(), oldExpectedReplicas); } } finally { namesystem.writeUnlock(); @@ -3411,7 +3418,7 @@ public class BlockManager { final NumberReplicas n = countNodes(block); if (isNeededReplication(block, expected, n.liveReplicas())) { neededReplications.add(block, n.liveReplicas(), - n.decommissionedReplicas(), expected); + n.decommissionedAndDecommissioning(), expected); } else if (n.liveReplicas() > expected) { processOverReplicatedBlock(block, expected, null, null); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java index c76b192c7e9..437d99a2c21 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java @@ -299,7 +299,8 @@ public class DecommissionManager { LOG.info("Block: " + block + ", Expected Replicas: " + curExpectedReplicas + ", live replicas: " + curReplicas + ", corrupt replicas: " + num.corruptReplicas() - + ", decommissioned replicas: " + num.decommissionedReplicas() + + ", decommissioned replicas: " + num.decommissioned() + + ", decommissioning replicas: " + num.decommissioning() + ", excess replicas: " + num.excessReplicas() + ", Is Open File: " + bc.isUnderConstruction() + ", Datanodes having this block: " + nodeList + ", Current Datanode: " @@ -572,7 +573,7 @@ public class DecommissionManager { // Process these blocks only when active NN is out of safe mode. blockManager.neededReplications.add(block, curReplicas, - num.decommissionedReplicas(), + num.decommissionedAndDecommissioning(), bc.getBlockReplication()); } } @@ -601,7 +602,7 @@ public class DecommissionManager { if (bc.isUnderConstruction()) { underReplicatedInOpenFiles++; } - if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) { + if ((curReplicas == 0) && (num.decommissionedAndDecommissioning() > 0)) { decommissionOnlyReplicas++; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java index 9e5c8dfd5ec..e567bbf3a2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java @@ -19,26 +19,33 @@ package org.apache.hadoop.hdfs.server.blockmanagement; /** * A immutable object that stores the number of live replicas and - * the number of decommissined Replicas. + * the number of decommissioned Replicas. */ public class NumberReplicas { private int liveReplicas; - private int decommissionedReplicas; + + // Tracks only the decommissioning replicas + private int decommissioning; + // Tracks only the decommissioned replicas + private int decommissioned; private int corruptReplicas; private int excessReplicas; private int replicasOnStaleNodes; NumberReplicas() { - initialize(0, 0, 0, 0, 0); + initialize(0, 0, 0, 0, 0, 0); } - NumberReplicas(int live, int decommissioned, int corrupt, int excess, int stale) { - initialize(live, decommissioned, corrupt, excess, stale); + NumberReplicas(int live, int decommissioned, int decommissioning, int corrupt, + int excess, int stale) { + initialize(live, decommissioned, decommissioning, corrupt, excess, stale); } - void initialize(int live, int decommissioned, int corrupt, int excess, int stale) { + void initialize(int live, int decommissioned, int decommissioning, + int corrupt, int excess, int stale) { liveReplicas = live; - decommissionedReplicas = decommissioned; + this.decommissioning = decommissioning; + this.decommissioned = decommissioned; corruptReplicas = corrupt; excessReplicas = excess; replicasOnStaleNodes = stale; @@ -47,12 +54,46 @@ public class NumberReplicas { public int liveReplicas() { return liveReplicas; } + + /** + * + * @return decommissioned replicas + decommissioning replicas + * It is deprecated by decommissionedAndDecommissioning + * due to its misleading name. + */ + @Deprecated public int decommissionedReplicas() { - return decommissionedReplicas; + return decommissionedAndDecommissioning(); } + + /** + * + * @return decommissioned and decommissioning replicas + */ + public int decommissionedAndDecommissioning() { + return decommissioned + decommissioning; + } + + /** + * + * @return decommissioned replicas only + */ + public int decommissioned() { + return decommissioned; + } + + /** + * + * @return decommissioning replicas only + */ + public int decommissioning() { + return decommissioning; + } + public int corruptReplicas() { return corruptReplicas; } + public int excessReplicas() { return excessReplicas; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index f66c84ad83f..3ff3c19b90c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -245,8 +245,10 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.println("No. of live Replica: " + numberReplicas.liveReplicas()); out.println("No. of excess Replica: " + numberReplicas.excessReplicas()); out.println("No. of stale Replica: " + numberReplicas.replicasOnStaleNodes()); - out.println("No. of decommission Replica: " - + numberReplicas.decommissionedReplicas()); + out.println("No. of decommissioned Replica: " + + numberReplicas.decommissioned()); + out.println("No. of decommissioning Replica: " + + numberReplicas.decommissioning()); out.println("No. of corrupted Replica: " + numberReplicas.corruptReplicas()); //record datanodes that have corrupted block replica Collection corruptionRecord = null; @@ -502,10 +504,16 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { NumberReplicas numberReplicas = namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock()); int liveReplicas = numberReplicas.liveReplicas(); - res.totalReplicas += liveReplicas; + int decommissionedReplicas = numberReplicas.decommissioned();; + int decommissioningReplicas = numberReplicas.decommissioning(); + res.decommissionedReplicas += decommissionedReplicas; + res.decommissioningReplicas += decommissioningReplicas; + int totalReplicas = liveReplicas + decommissionedReplicas + + decommissioningReplicas; + res.totalReplicas += totalReplicas; short targetFileReplication = file.getReplication(); res.numExpectedReplicas += targetFileReplication; - if(liveReplicas < res.minReplication){ + if(totalReplicas < res.minReplication){ res.numUnderMinReplicatedBlocks++; } if (liveReplicas > targetFileReplication) { @@ -525,10 +533,10 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName()+"\n"); } - if (liveReplicas >= res.minReplication) + if (totalReplicas >= res.minReplication) res.numMinReplicatedBlocks++; - if (liveReplicas < targetFileReplication && liveReplicas > 0) { - res.missingReplicas += (targetFileReplication - liveReplicas); + if (totalReplicas < targetFileReplication && totalReplicas > 0) { + res.missingReplicas += (targetFileReplication - totalReplicas); res.numUnderReplicatedBlocks += 1; underReplicatedPerFile++; if (!showFiles) { @@ -537,7 +545,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.println(" Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + - liveReplicas + " replica(s)."); + liveReplicas + " live replica(s), " + + decommissionedReplicas + " decommissioned replica(s) and " + + decommissioningReplicas + " decommissioning replica(s)."); } // count mis replicated blocks @@ -555,7 +565,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { block + ". " + blockPlacementStatus.getErrorDescription()); } report.append(i + ". " + blkName + " len=" + block.getNumBytes()); - if (liveReplicas == 0) { + if (totalReplicas == 0) { report.append(" MISSING!"); res.addMissing(block.toString(), block.getNumBytes()); missing++; @@ -855,6 +865,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { long corruptBlocks = 0L; long excessiveReplicas = 0L; long missingReplicas = 0L; + long decommissionedReplicas = 0L; + long decommissioningReplicas = 0L; long numUnderMinReplicatedBlocks=0L; long numOverReplicatedBlocks = 0L; long numUnderReplicatedBlocks = 0L; @@ -926,7 +938,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { res.append(" (Total open file blocks (not validated): ").append( totalOpenFilesBlocks).append(")"); } - if (corruptFiles > 0 || numUnderMinReplicatedBlocks>0) { + if (corruptFiles > 0 || numUnderMinReplicatedBlocks > 0) { res.append("\n ********************************"); if(numUnderMinReplicatedBlocks>0){ res.append("\n UNDER MIN REPL'D BLOCKS:\t").append(numUnderMinReplicatedBlocks); @@ -989,6 +1001,14 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { ((float) (missingReplicas * 100) / (float) numExpectedReplicas)).append( " %)"); } + if (decommissionedReplicas > 0) { + res.append("\n DecommissionedReplicas:\t").append( + decommissionedReplicas); + } + if (decommissioningReplicas > 0) { + res.append("\n DecommissioningReplicas:\t").append( + decommissioningReplicas); + } return res.toString(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java index 3bc986fd8f2..669dcec21c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java @@ -191,7 +191,7 @@ public class TestClientReportBadBlock { verifyFirstBlockCorrupted(filePath, false); int expectedReplicaCount = repl-corruptBlocReplicas; verifyCorruptedBlockCount(filePath, expectedReplicaCount); - verifyFsckHealth("Target Replicas is 3 but found 1 replica"); + verifyFsckHealth("Target Replicas is 3 but found 1 live replica"); testFsckListCorruptFilesBlocks(filePath, 0); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java index e6bf0672d56..8f99afba1f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java @@ -192,7 +192,7 @@ public class TestReadOnlySharedStorage { assertThat(numberReplicas.liveReplicas(), is(expectedReplicas)); assertThat(numberReplicas.excessReplicas(), is(0)); assertThat(numberReplicas.corruptReplicas(), is(0)); - assertThat(numberReplicas.decommissionedReplicas(), is(0)); + assertThat(numberReplicas.decommissionedAndDecommissioning(), is(0)); assertThat(numberReplicas.replicasOnStaleNodes(), is(0)); BlockManagerTestUtil.updateState(blockManager); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index df577681d02..e572ef0c97c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -1467,4 +1467,85 @@ public class TestFsck { } } } + + /** + * Test for blocks on decommissioning hosts are not shown as missing + */ + @Test + public void testFsckWithDecommissionedReplicas() throws Exception { + + final short REPL_FACTOR = 1; + short NUM_DN = 2; + final long blockSize = 512; + final long fileSize = 1024; + boolean checkDecommissionInProgress = false; + String [] racks = {"/rack1", "/rack2"}; + String [] hosts = {"host1", "host2"}; + + Configuration conf = new Configuration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1); + + MiniDFSCluster cluster; + DistributedFileSystem dfs ; + cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts) + .racks(racks).build(); + + assertNotNull("Failed Cluster Creation", cluster); + cluster.waitClusterUp(); + dfs = cluster.getFileSystem(); + assertNotNull("Failed to get FileSystem", dfs); + + DFSTestUtil util = new DFSTestUtil.Builder(). + setName(getClass().getSimpleName()).setNumFiles(1).build(); + + //create files + final String testFile = new String("/testfile"); + final Path path = new Path(testFile); + util.createFile(dfs, path, fileSize, REPL_FACTOR, 1000L); + util.waitReplication(dfs, path, REPL_FACTOR); + try { + // make sure datanode that has replica is fine before decommission + String outStr = runFsck(conf, 0, true, testFile); + System.out.println(outStr); + assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); + + // decommission datanode + ExtendedBlock eb = util.getFirstBlock(dfs, path); + DatanodeDescriptor dn = cluster.getNameNode().getNamesystem() + .getBlockManager().getBlockCollection(eb.getLocalBlock()) + .getBlocks()[0].getDatanode(0); + cluster.getNameNode().getNamesystem().getBlockManager() + .getDatanodeManager().getDecomManager().startDecommission(dn); + String dnName = dn.getXferAddr(); + + // wait for decommission start + DatanodeInfo datanodeInfo = null; + int count = 0; + do { + Thread.sleep(2000); + for (DatanodeInfo info : dfs.getDataNodeStats()) { + if (dnName.equals(info.getXferAddr())) { + datanodeInfo = info; + } + } + // check the replica status should be healthy(0) + // instead of corruption (1) during decommissioning + if(!checkDecommissionInProgress && datanodeInfo != null + && datanodeInfo.isDecommissionInProgress()) { + String fsckOut = runFsck(conf, 0, true, testFile); + checkDecommissionInProgress = true; + } + } while (datanodeInfo != null && !datanodeInfo.isDecommissioned()); + + // check the replica status should be healthy(0) after decommission + // is done + String fsckOut = runFsck(conf, 0, true, testFile); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } }