From 8891c7d08b209beb63b1da1cb07c71f25aa85f17 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Fri, 21 Mar 2014 20:57:44 +0000 Subject: [PATCH] HDFS-6120. Merging r1580047 from trunk to branch-2. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1580048 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hdfs/server/namenode/FSNamesystem.java | 84 +++++++++++-------- .../org/apache/hadoop/hdfs/TestSafeMode.java | 4 +- .../server/namenode/ha/TestHASafeMode.java | 7 +- 4 files changed, 56 insertions(+), 41 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index a3d2423930c..94a62d7a09e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -186,6 +186,8 @@ Release 2.4.0 - UNRELEASED HDFS-6138. Add a user guide for how to use viewfs with federation. (sanjay and szetszwo via szetszwo) + HDFS-6120. Fix and improve safe mode log messages. (Arpit Agarwal) + OPTIMIZATIONS HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index dd81bc203dd..d9e8faa343e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -91,7 +91,6 @@ import java.io.ByteArrayInputStream; import java.io.DataInput; import java.io.DataInputStream; -import java.io.DataOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; @@ -4803,13 +4802,21 @@ private synchronized boolean canInitializeReplQueues() { * @return true if can leave or false otherwise. */ private synchronized boolean canLeave() { - if (reached == 0) - return false; - if (now() - reached < extension) { - reportStatus("STATE* Safe mode ON.", false); + if (reached == 0) { return false; } - return !needEnter(); + + if (now() - reached < extension) { + reportStatus("STATE* Safe mode ON, in safe mode extension.", false); + return false; + } + + if (needEnter()) { + reportStatus("STATE* Safe mode ON, thresholds not met.", false); + return false; + } + + return true; } /** @@ -4953,56 +4960,59 @@ private void setResourcesLow() { * A tip on how safe mode is to be turned off: manually or automatically. */ String getTurnOffTip() { - if(!isOn()) + if(!isOn()) { return "Safe mode is OFF."; + } //Manual OR low-resource safemode. (Admin intervention required) - String leaveMsg = "It was turned on manually. "; + String adminMsg = "It was turned on manually. "; if (areResourcesLow()) { - leaveMsg = "Resources are low on NN. Please add or free up more " + adminMsg = "Resources are low on NN. Please add or free up more " + "resources then turn off safe mode manually. NOTE: If you turn off" + " safe mode before adding resources, " + "the NN will immediately return to safe mode. "; } if (isManual() || areResourcesLow()) { - return leaveMsg + return adminMsg + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; } - //Automatic safemode. System will come out of safemode automatically. - leaveMsg = "Safe mode will be turned off automatically"; + boolean thresholdsMet = true; int numLive = getNumLiveDataNodes(); String msg = ""; - if (reached == 0) { - if (blockSafe < blockThreshold) { - msg += String.format( - "The reported blocks %d needs additional %d" - + " blocks to reach the threshold %.4f of total blocks %d.\n", - blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); - } - if (numLive < datanodeThreshold) { - msg += String.format( - "The number of live datanodes %d needs an additional %d live " - + "datanodes to reach the minimum number %d.\n", - numLive, (datanodeThreshold - numLive), datanodeThreshold); - } + if (blockSafe < blockThreshold) { + msg += String.format( + "The reported blocks %d needs additional %d" + + " blocks to reach the threshold %.4f of total blocks %d.\n", + blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); + thresholdsMet = false; } else { - msg = String.format("The reported blocks %d has reached the threshold" + msg += String.format("The reported blocks %d has reached the threshold" + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); - + } + if (numLive < datanodeThreshold) { + msg += String.format( + "The number of live datanodes %d needs an additional %d live " + + "datanodes to reach the minimum number %d.\n", + numLive, (datanodeThreshold - numLive), datanodeThreshold); + thresholdsMet = false; + } else { msg += String.format("The number of live datanodes %d has reached " - + "the minimum number %d. ", - numLive, datanodeThreshold); + + "the minimum number %d. ", + numLive, datanodeThreshold); } - msg += leaveMsg; - // threshold is not reached or manual or resources low - if(reached == 0 || (isManual() && !areResourcesLow())) { - return msg; + msg += (reached > 0) ? "In safe mode extension. " : ""; + msg += "Safe mode will be turned off automatically "; + + if (!thresholdsMet) { + msg += "once the thresholds have been reached."; + } else if (reached + extension - now() > 0) { + msg += ("in " + (reached + extension - now()) / 1000 + " seconds."); + } else { + msg += "soon."; } - // extension period is in progress - return msg + (reached + extension - now() > 0 ? - " in " + (reached + extension - now()) / 1000 + " seconds." - : " soon."); + + return msg; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index 59e66b413a0..c98cb8992d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -182,7 +182,9 @@ public void testInitializeReplQueuesEarly() throws Exception { String status = nn.getNamesystem().getSafemode(); assertEquals("Safe mode is ON. The reported blocks 0 needs additional " + "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" + - "Safe mode will be turned off automatically", status); + "The number of live datanodes 0 has reached the minimum number 0. " + + "Safe mode will be turned off automatically once the thresholds " + + "have been reached.", status); assertFalse("Mis-replicated block queues should not be initialized " + "until threshold is crossed", NameNodeAdapter.safeModeInitializedReplQueues(nn)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index 1c7b7f423cf..29b0e34ca95 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -495,7 +495,8 @@ private static void assertSafeMode(NameNode nn, int safe, int total, "Safe mode is ON. The reported blocks " + safe + " has reached the " + "threshold 0.9990 of total blocks " + total + ". The number of " + "live datanodes " + numNodes + " has reached the minimum number " - + nodeThresh + ". Safe mode will be turned off automatically")); + + nodeThresh + ". In safe mode extension. " + + "Safe mode will be turned off automatically")); } else { int additional = total - safe; assertTrue("Bad safemode status: '" + status + "'", @@ -565,8 +566,8 @@ public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception status.startsWith( "Safe mode is ON. The reported blocks 10 has reached the threshold " + "0.9990 of total blocks 10. The number of live datanodes 3 has " - + "reached the minimum number 0. Safe mode will be turned off " - + "automatically")); + + "reached the minimum number 0. In safe mode extension. " + + "Safe mode will be turned off automatically")); // Delete those blocks while the SBN is in safe mode. // Immediately roll the edit log before the actual deletions are sent