diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 0dae4ed5d38..c9a4feee52d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -434,6 +434,9 @@ Release 2.1.0-beta - 2013-07-02
HDFS-4944. WebHDFS cannot create a file path containing characters that must
be URI-encoded, such as space. (cnauroth)
+ HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via
+ kihwal)
+
BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS
HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 34d33b5a01b..7e5a3298799 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -4014,9 +4014,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// internal fields
/** Time when threshold was reached.
- *
- *
-1 safe mode is off
- *
0 safe mode is on, but threshold is not reached yet
+ *
-1 safe mode is off
+ *
0 safe mode is on, and threshold is not reached yet
+ *
>0 safe mode is on, but we are in extension period
*/
private long reached = -1;
/** Total number of blocks. */
@@ -4140,7 +4140,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
NameNode.stateChangeLog.info("STATE* Leaving safe mode after "
+ timeInSafemode/1000 + " secs");
NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
-
+
+ //Log the following only once (when transitioning from ON -> OFF)
if (reached >= 0) {
NameNode.stateChangeLog.info("STATE* Safe mode is OFF");
}
@@ -4321,62 +4322,56 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* A tip on how safe mode is to be turned off: manually or automatically.
*/
String getTurnOffTip() {
- if(reached < 0)
+ if(!isOn())
return "Safe mode is OFF.";
- String leaveMsg = "";
+
+ //Manual OR low-resource safemode. (Admin intervention required)
+ String leaveMsg = "It was turned on manually. ";
if (areResourcesLow()) {
- leaveMsg = "Resources are low on NN. "
- + "Please add or free up more resources then turn off safe mode manually. "
- + "NOTE: If you turn off safe mode before adding resources, "
- + "the NN will immediately return to safe mode.";
- } else {
- leaveMsg = "Safe mode will be turned off automatically";
+ leaveMsg = "Resources are low on NN. Please add or free up more "
+ + "resources then turn off safe mode manually. NOTE: If you turn off"
+ + " safe mode before adding resources, "
+ + "the NN will immediately return to safe mode. ";
}
- if(isManual() && !areResourcesLow()) {
- leaveMsg = "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off";
+ if (isManual() || areResourcesLow()) {
+ return leaveMsg
+ + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
}
- if(blockTotal < 0)
- return leaveMsg + ".";
-
+ //Automatic safemode. System will come out of safemode automatically.
+ leaveMsg = "Safe mode will be turned off automatically";
int numLive = getNumLiveDataNodes();
String msg = "";
if (reached == 0) {
if (blockSafe < blockThreshold) {
msg += String.format(
"The reported blocks %d needs additional %d"
- + " blocks to reach the threshold %.4f of total blocks %d.",
+ + " blocks to reach the threshold %.4f of total blocks %d.\n",
blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
}
if (numLive < datanodeThreshold) {
- if (!"".equals(msg)) {
- msg += "\n";
- }
msg += String.format(
"The number of live datanodes %d needs an additional %d live "
- + "datanodes to reach the minimum number %d.",
+ + "datanodes to reach the minimum number %d.\n",
numLive, (datanodeThreshold - numLive), datanodeThreshold);
}
- msg += " " + leaveMsg;
} else {
msg = String.format("The reported blocks %d has reached the threshold"
- + " %.4f of total blocks %d.", blockSafe, threshold,
- blockTotal);
+ + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
- if (datanodeThreshold > 0) {
- msg += String.format(" The number of live datanodes %d has reached "
- + "the minimum number %d.",
+ msg += String.format("The number of live datanodes %d has reached "
+ + "the minimum number %d. ",
numLive, datanodeThreshold);
- }
- msg += " " + leaveMsg;
}
+ msg += leaveMsg;
// threshold is not reached or manual or resources low
if(reached == 0 || (isManual() && !areResourcesLow())) {
- return msg + ".";
+ return msg;
}
// extension period is in progress
- return msg + " in " + Math.abs(reached + extension - now()) / 1000
- + " seconds.";
+ return msg + (reached + extension - now() > 0 ?
+ " in " + (reached + extension - now()) / 1000 + " seconds."
+ : " soon.");
}
/**
@@ -5631,7 +5626,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
public String getSafemode() {
if (!this.isInSafeMode())
return "";
- return "Safe mode is ON." + this.getSafeModeTip();
+ return "Safe mode is ON. " + this.getSafeModeTip();
}
@Override // NameNodeMXBean
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
index 794b44d438a..7aaff5a04ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
@@ -178,9 +178,9 @@ public class TestSafeMode {
final NameNode nn = cluster.getNameNode();
String status = nn.getNamesystem().getSafemode();
- assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
- "15 blocks to reach the threshold 0.9990 of total blocks 15. " +
- "Safe mode will be turned off automatically.", status);
+ assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
+ "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
+ "Safe mode will be turned off automatically", status);
assertFalse("Mis-replicated block queues should not be initialized " +
"until threshold is crossed",
NameNodeAdapter.safeModeInitializedReplQueues(nn));
@@ -353,10 +353,10 @@ public class TestSafeMode {
fs = cluster.getFileSystem();
String tipMsg = cluster.getNamesystem().getSafemode();
- assertTrue("Safemode tip message looks right: " + tipMsg,
+ assertTrue("Safemode tip message doesn't look right: " + tipMsg,
tipMsg.contains("The number of live datanodes 0 needs an additional " +
- "1 live datanodes to reach the minimum number 1. " +
- "Safe mode will be turned off automatically."));
+ "1 live datanodes to reach the minimum number 1.\n" +
+ "Safe mode will be turned off automatically"));
// Start a datanode
cluster.startDataNodes(conf, 1, true, null, null);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index 917504eca43..309b4d0f74e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -206,11 +206,11 @@ public class TestHASafeMode {
// We expect it not to be stuck in safemode, since those blocks
// that are already visible to the SBN should be processed
// in the initial block reports.
- assertSafeMode(nn1, 3, 3);
+ assertSafeMode(nn1, 3, 3, 3, 0);
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
- assertSafeMode(nn1, 8, 8);
+ assertSafeMode(nn1, 8, 8, 3, 0);
}
/**
@@ -230,7 +230,7 @@ public class TestHASafeMode {
banner("Restarting standby");
restartStandby();
- assertSafeMode(nn1, 3, 3);
+ assertSafeMode(nn1, 3, 3, 3, 0);
// Create a few blocks which will send blockReceived calls to the
// SBN.
@@ -241,7 +241,7 @@ public class TestHASafeMode {
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
- assertSafeMode(nn1, 8, 8);
+ assertSafeMode(nn1, 8, 8, 3, 0);
}
/**
@@ -281,11 +281,11 @@ public class TestHASafeMode {
banner("Restarting standby");
restartStandby();
- assertSafeMode(nn1, 0, 5);
+ assertSafeMode(nn1, 0, 5, 3, 0);
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
- assertSafeMode(nn1, 0, 0);
+ assertSafeMode(nn1, 0, 0, 3, 0);
}
/**
@@ -307,7 +307,7 @@ public class TestHASafeMode {
restartStandby();
// It will initially have all of the blocks necessary.
- assertSafeMode(nn1, 10, 10);
+ assertSafeMode(nn1, 10, 10, 3, 0);
// Delete those blocks while the SBN is in safe mode.
// This doesn't affect the SBN, since deletions are not
@@ -322,14 +322,14 @@ public class TestHASafeMode {
HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports();
- assertSafeMode(nn1, 10, 10);
+ assertSafeMode(nn1, 10, 10, 3, 0);
// When we catch up to active namespace, it will restore back
// to 0 blocks.
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
- assertSafeMode(nn1, 0, 0);
+ assertSafeMode(nn1, 0, 0, 3, 0);
}
/**
@@ -355,20 +355,20 @@ public class TestHASafeMode {
restartStandby();
// It will initially have all of the blocks necessary.
- assertSafeMode(nn1, 5, 5);
+ assertSafeMode(nn1, 5, 5, 3, 0);
// Append to a block while SBN is in safe mode. This should
// not affect safemode initially, since the DN message
// will get queued.
FSDataOutputStream stm = fs.append(new Path("/test"));
try {
- assertSafeMode(nn1, 5, 5);
+ assertSafeMode(nn1, 5, 5, 3, 0);
// if we roll edits now, the SBN should see that it's under construction
// and change its total count and safe count down by one, since UC
// blocks are not counted by safe mode.
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
- assertSafeMode(nn1, 4, 4);
+ assertSafeMode(nn1, 4, 4, 3, 0);
} finally {
IOUtils.closeStream(stm);
}
@@ -386,13 +386,13 @@ public class TestHASafeMode {
HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports();
- assertSafeMode(nn1, 4, 4);
+ assertSafeMode(nn1, 4, 4, 3, 0);
// When we roll the edit log, the deletions will go through.
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
- assertSafeMode(nn1, 0, 0);
+ assertSafeMode(nn1, 0, 0, 3, 0);
}
/**
@@ -424,20 +424,21 @@ public class TestHASafeMode {
restartActive();
}
- private void assertSafeMode(NameNode nn, int safe, int total) {
- String status = nn1.getNamesystem().getSafemode();
+ private static void assertSafeMode(NameNode nn, int safe, int total,
+ int numNodes, int nodeThresh) {
+ String status = nn.getNamesystem().getSafemode();
if (safe == total) {
assertTrue("Bad safemode status: '" + status + "'",
status.startsWith(
- "Safe mode is ON." +
- "The reported blocks " + safe + " has reached the threshold " +
- "0.9990 of total blocks " + total + ". Safe mode will be " +
- "turned off automatically"));
+ "Safe mode is ON. The reported blocks " + safe + " has reached the "
+ + "threshold 0.9990 of total blocks " + total + ". The number of "
+ + "live datanodes " + numNodes + " has reached the minimum number "
+ + nodeThresh + ". Safe mode will be turned off automatically"));
} else {
int additional = total - safe;
assertTrue("Bad safemode status: '" + status + "'",
status.startsWith(
- "Safe mode is ON." +
+ "Safe mode is ON. " +
"The reported blocks " + safe + " needs additional " +
additional + " blocks"));
}
@@ -467,14 +468,14 @@ public class TestHASafeMode {
// We expect it to be on its way out of safemode, since all of the blocks
// from the edit log have been reported.
- assertSafeMode(nn1, 3, 3);
+ assertSafeMode(nn1, 3, 3, 3, 0);
// Initiate a failover into it while it's in safemode
banner("Initiating a failover into NN1 in safemode");
NameNodeAdapter.abortEditLogs(nn0);
cluster.transitionToActive(1);
- assertSafeMode(nn1, 5, 5);
+ assertSafeMode(nn1, 5, 5, 3, 0);
}
/**
@@ -499,10 +500,11 @@ public class TestHASafeMode {
// It will initially have all of the blocks necessary.
String status = nn1.getNamesystem().getSafemode();
assertTrue("Bad safemode status: '" + status + "'",
- status.startsWith(
- "Safe mode is ON." +
- "The reported blocks 10 has reached the threshold 0.9990 of " +
- "total blocks 10. Safe mode will be turned off automatically"));
+ status.startsWith(
+ "Safe mode is ON. The reported blocks 10 has reached the threshold "
+ + "0.9990 of total blocks 10. The number of live datanodes 3 has "
+ + "reached the minimum number 0. Safe mode will be turned off "
+ + "automatically"));
// Delete those blocks while the SBN is in safe mode.
// Immediately roll the edit log before the actual deletions are sent
@@ -512,7 +514,7 @@ public class TestHASafeMode {
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
// Should see removal of the blocks as well as their contribution to safe block count.
- assertSafeMode(nn1, 0, 0);
+ assertSafeMode(nn1, 0, 0, 3, 0);
banner("Triggering sending deletions to DNs and Deletion Reports");
@@ -525,7 +527,7 @@ public class TestHASafeMode {
// No change in assertion status here, but some of the consistency checks
// in safemode will fire here if we accidentally decrement safe block count
// below 0.
- assertSafeMode(nn1, 0, 0);
+ assertSafeMode(nn1, 0, 0, 3, 0);
}
@@ -561,11 +563,11 @@ public class TestHASafeMode {
banner("Restarting SBN");
restartStandby();
- assertSafeMode(nn1, 10, 10);
+ assertSafeMode(nn1, 10, 10, 3, 0);
banner("Allowing SBN to catch up");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
- assertSafeMode(nn1, 15, 15);
+ assertSafeMode(nn1, 15, 15, 3, 0);
}
/**
@@ -593,7 +595,7 @@ public class TestHASafeMode {
nn0.getRpcServer().rollEditLog();
restartStandby();
- assertSafeMode(nn1, 6, 6);
+ assertSafeMode(nn1, 6, 6, 3, 0);
}
/**