diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0dae4ed5d38..c9a4feee52d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -434,6 +434,9 @@ Release 2.1.0-beta - 2013-07-02 HDFS-4944. WebHDFS cannot create a file path containing characters that must be URI-encoded, such as space. (cnauroth) + HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via + kihwal) + BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 34d33b5a01b..7e5a3298799 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4014,9 +4014,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // internal fields /** Time when threshold was reached. - * - *
-1 safe mode is off - *
0 safe mode is on, but threshold is not reached yet + *
-1 safe mode is off + *
0 safe mode is on, and threshold is not reached yet + *
>0 safe mode is on, but we are in extension period */ private long reached = -1; /** Total number of blocks. */ @@ -4140,7 +4140,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, NameNode.stateChangeLog.info("STATE* Leaving safe mode after " + timeInSafemode/1000 + " secs"); NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); - + + //Log the following only once (when transitioning from ON -> OFF) if (reached >= 0) { NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); } @@ -4321,62 +4322,56 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * A tip on how safe mode is to be turned off: manually or automatically. */ String getTurnOffTip() { - if(reached < 0) + if(!isOn()) return "Safe mode is OFF."; - String leaveMsg = ""; + + //Manual OR low-resource safemode. (Admin intervention required) + String leaveMsg = "It was turned on manually. "; if (areResourcesLow()) { - leaveMsg = "Resources are low on NN. " - + "Please add or free up more resources then turn off safe mode manually. " - + "NOTE: If you turn off safe mode before adding resources, " - + "the NN will immediately return to safe mode."; - } else { - leaveMsg = "Safe mode will be turned off automatically"; + leaveMsg = "Resources are low on NN. Please add or free up more " + + "resources then turn off safe mode manually. NOTE: If you turn off" + + " safe mode before adding resources, " + + "the NN will immediately return to safe mode. "; } - if(isManual() && !areResourcesLow()) { - leaveMsg = "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off"; + if (isManual() || areResourcesLow()) { + return leaveMsg + + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; } - if(blockTotal < 0) - return leaveMsg + "."; - + //Automatic safemode. System will come out of safemode automatically. + leaveMsg = "Safe mode will be turned off automatically"; int numLive = getNumLiveDataNodes(); String msg = ""; if (reached == 0) { if (blockSafe < blockThreshold) { msg += String.format( "The reported blocks %d needs additional %d" - + " blocks to reach the threshold %.4f of total blocks %d.", + + " blocks to reach the threshold %.4f of total blocks %d.\n", blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); } if (numLive < datanodeThreshold) { - if (!"".equals(msg)) { - msg += "\n"; - } msg += String.format( "The number of live datanodes %d needs an additional %d live " - + "datanodes to reach the minimum number %d.", + + "datanodes to reach the minimum number %d.\n", numLive, (datanodeThreshold - numLive), datanodeThreshold); } - msg += " " + leaveMsg; } else { msg = String.format("The reported blocks %d has reached the threshold" - + " %.4f of total blocks %d.", blockSafe, threshold, - blockTotal); + + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); - if (datanodeThreshold > 0) { - msg += String.format(" The number of live datanodes %d has reached " - + "the minimum number %d.", + msg += String.format("The number of live datanodes %d has reached " + + "the minimum number %d. ", numLive, datanodeThreshold); - } - msg += " " + leaveMsg; } + msg += leaveMsg; // threshold is not reached or manual or resources low if(reached == 0 || (isManual() && !areResourcesLow())) { - return msg + "."; + return msg; } // extension period is in progress - return msg + " in " + Math.abs(reached + extension - now()) / 1000 - + " seconds."; + return msg + (reached + extension - now() > 0 ? + " in " + (reached + extension - now()) / 1000 + " seconds." + : " soon."); } /** @@ -5631,7 +5626,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, public String getSafemode() { if (!this.isInSafeMode()) return ""; - return "Safe mode is ON." + this.getSafeModeTip(); + return "Safe mode is ON. " + this.getSafeModeTip(); } @Override // NameNodeMXBean diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index 794b44d438a..7aaff5a04ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -178,9 +178,9 @@ public class TestSafeMode { final NameNode nn = cluster.getNameNode(); String status = nn.getNamesystem().getSafemode(); - assertEquals("Safe mode is ON.The reported blocks 0 needs additional " + - "15 blocks to reach the threshold 0.9990 of total blocks 15. " + - "Safe mode will be turned off automatically.", status); + assertEquals("Safe mode is ON. The reported blocks 0 needs additional " + + "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" + + "Safe mode will be turned off automatically", status); assertFalse("Mis-replicated block queues should not be initialized " + "until threshold is crossed", NameNodeAdapter.safeModeInitializedReplQueues(nn)); @@ -353,10 +353,10 @@ public class TestSafeMode { fs = cluster.getFileSystem(); String tipMsg = cluster.getNamesystem().getSafemode(); - assertTrue("Safemode tip message looks right: " + tipMsg, + assertTrue("Safemode tip message doesn't look right: " + tipMsg, tipMsg.contains("The number of live datanodes 0 needs an additional " + - "1 live datanodes to reach the minimum number 1. " + - "Safe mode will be turned off automatically.")); + "1 live datanodes to reach the minimum number 1.\n" + + "Safe mode will be turned off automatically")); // Start a datanode cluster.startDataNodes(conf, 1, true, null, null); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index 917504eca43..309b4d0f74e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -206,11 +206,11 @@ public class TestHASafeMode { // We expect it not to be stuck in safemode, since those blocks // that are already visible to the SBN should be processed // in the initial block reports. - assertSafeMode(nn1, 3, 3); + assertSafeMode(nn1, 3, 3, 3, 0); banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 8, 8); + assertSafeMode(nn1, 8, 8, 3, 0); } /** @@ -230,7 +230,7 @@ public class TestHASafeMode { banner("Restarting standby"); restartStandby(); - assertSafeMode(nn1, 3, 3); + assertSafeMode(nn1, 3, 3, 3, 0); // Create a few blocks which will send blockReceived calls to the // SBN. @@ -241,7 +241,7 @@ public class TestHASafeMode { banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 8, 8); + assertSafeMode(nn1, 8, 8, 3, 0); } /** @@ -281,11 +281,11 @@ public class TestHASafeMode { banner("Restarting standby"); restartStandby(); - assertSafeMode(nn1, 0, 5); + assertSafeMode(nn1, 0, 5, 3, 0); banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } /** @@ -307,7 +307,7 @@ public class TestHASafeMode { restartStandby(); // It will initially have all of the blocks necessary. - assertSafeMode(nn1, 10, 10); + assertSafeMode(nn1, 10, 10, 3, 0); // Delete those blocks while the SBN is in safe mode. // This doesn't affect the SBN, since deletions are not @@ -322,14 +322,14 @@ public class TestHASafeMode { HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); - assertSafeMode(nn1, 10, 10); + assertSafeMode(nn1, 10, 10, 3, 0); // When we catch up to active namespace, it will restore back // to 0 blocks. banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } /** @@ -355,20 +355,20 @@ public class TestHASafeMode { restartStandby(); // It will initially have all of the blocks necessary. - assertSafeMode(nn1, 5, 5); + assertSafeMode(nn1, 5, 5, 3, 0); // Append to a block while SBN is in safe mode. This should // not affect safemode initially, since the DN message // will get queued. FSDataOutputStream stm = fs.append(new Path("/test")); try { - assertSafeMode(nn1, 5, 5); + assertSafeMode(nn1, 5, 5, 3, 0); // if we roll edits now, the SBN should see that it's under construction // and change its total count and safe count down by one, since UC // blocks are not counted by safe mode. HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 4, 4); + assertSafeMode(nn1, 4, 4, 3, 0); } finally { IOUtils.closeStream(stm); } @@ -386,13 +386,13 @@ public class TestHASafeMode { HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); - assertSafeMode(nn1, 4, 4); + assertSafeMode(nn1, 4, 4, 3, 0); // When we roll the edit log, the deletions will go through. banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } /** @@ -424,20 +424,21 @@ public class TestHASafeMode { restartActive(); } - private void assertSafeMode(NameNode nn, int safe, int total) { - String status = nn1.getNamesystem().getSafemode(); + private static void assertSafeMode(NameNode nn, int safe, int total, + int numNodes, int nodeThresh) { + String status = nn.getNamesystem().getSafemode(); if (safe == total) { assertTrue("Bad safemode status: '" + status + "'", status.startsWith( - "Safe mode is ON." + - "The reported blocks " + safe + " has reached the threshold " + - "0.9990 of total blocks " + total + ". Safe mode will be " + - "turned off automatically")); + "Safe mode is ON. The reported blocks " + safe + " has reached the " + + "threshold 0.9990 of total blocks " + total + ". The number of " + + "live datanodes " + numNodes + " has reached the minimum number " + + nodeThresh + ". Safe mode will be turned off automatically")); } else { int additional = total - safe; assertTrue("Bad safemode status: '" + status + "'", status.startsWith( - "Safe mode is ON." + + "Safe mode is ON. " + "The reported blocks " + safe + " needs additional " + additional + " blocks")); } @@ -467,14 +468,14 @@ public class TestHASafeMode { // We expect it to be on its way out of safemode, since all of the blocks // from the edit log have been reported. - assertSafeMode(nn1, 3, 3); + assertSafeMode(nn1, 3, 3, 3, 0); // Initiate a failover into it while it's in safemode banner("Initiating a failover into NN1 in safemode"); NameNodeAdapter.abortEditLogs(nn0); cluster.transitionToActive(1); - assertSafeMode(nn1, 5, 5); + assertSafeMode(nn1, 5, 5, 3, 0); } /** @@ -499,10 +500,11 @@ public class TestHASafeMode { // It will initially have all of the blocks necessary. String status = nn1.getNamesystem().getSafemode(); assertTrue("Bad safemode status: '" + status + "'", - status.startsWith( - "Safe mode is ON." + - "The reported blocks 10 has reached the threshold 0.9990 of " + - "total blocks 10. Safe mode will be turned off automatically")); + status.startsWith( + "Safe mode is ON. The reported blocks 10 has reached the threshold " + + "0.9990 of total blocks 10. The number of live datanodes 3 has " + + "reached the minimum number 0. Safe mode will be turned off " + + "automatically")); // Delete those blocks while the SBN is in safe mode. // Immediately roll the edit log before the actual deletions are sent @@ -512,7 +514,7 @@ public class TestHASafeMode { HATestUtil.waitForStandbyToCatchUp(nn0, nn1); // Should see removal of the blocks as well as their contribution to safe block count. - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); banner("Triggering sending deletions to DNs and Deletion Reports"); @@ -525,7 +527,7 @@ public class TestHASafeMode { // No change in assertion status here, but some of the consistency checks // in safemode will fire here if we accidentally decrement safe block count // below 0. - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } @@ -561,11 +563,11 @@ public class TestHASafeMode { banner("Restarting SBN"); restartStandby(); - assertSafeMode(nn1, 10, 10); + assertSafeMode(nn1, 10, 10, 3, 0); banner("Allowing SBN to catch up"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 15, 15); + assertSafeMode(nn1, 15, 15, 3, 0); } /** @@ -593,7 +595,7 @@ public class TestHASafeMode { nn0.getRpcServer().rollEditLog(); restartStandby(); - assertSafeMode(nn1, 6, 6); + assertSafeMode(nn1, 6, 6, 3, 0); } /**