HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. Contributed by Ravi Prakash.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1498665 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kihwal Lee 2013-07-01 20:20:27 +00:00
parent aa7e148d1f
commit ead7fa0413
4 changed files with 71 additions and 71 deletions

View File

@ -637,6 +637,9 @@ Release 2.1.0-beta - 2013-07-02
HDFS-4944. WebHDFS cannot create a file path containing characters that must HDFS-4944. WebHDFS cannot create a file path containing characters that must
be URI-encoded, such as space. (cnauroth) be URI-encoded, such as space. (cnauroth)
HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via
kihwal)
BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS
HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes. HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.

View File

@ -4031,9 +4031,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// internal fields // internal fields
/** Time when threshold was reached. /** Time when threshold was reached.
* * <br> -1 safe mode is off
* <br>-1 safe mode is off * <br> 0 safe mode is on, and threshold is not reached yet
* <br> 0 safe mode is on, but threshold is not reached yet * <br> >0 safe mode is on, but we are in extension period
*/ */
private long reached = -1; private long reached = -1;
/** Total number of blocks. */ /** Total number of blocks. */
@ -4157,7 +4157,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
NameNode.stateChangeLog.info("STATE* Leaving safe mode after " NameNode.stateChangeLog.info("STATE* Leaving safe mode after "
+ timeInSafemode/1000 + " secs"); + timeInSafemode/1000 + " secs");
NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
//Log the following only once (when transitioning from ON -> OFF)
if (reached >= 0) { if (reached >= 0) {
NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); NameNode.stateChangeLog.info("STATE* Safe mode is OFF");
} }
@ -4338,62 +4339,56 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* A tip on how safe mode is to be turned off: manually or automatically. * A tip on how safe mode is to be turned off: manually or automatically.
*/ */
String getTurnOffTip() { String getTurnOffTip() {
if(reached < 0) if(!isOn())
return "Safe mode is OFF."; return "Safe mode is OFF.";
String leaveMsg = "";
//Manual OR low-resource safemode. (Admin intervention required)
String leaveMsg = "It was turned on manually. ";
if (areResourcesLow()) { if (areResourcesLow()) {
leaveMsg = "Resources are low on NN. " leaveMsg = "Resources are low on NN. Please add or free up more "
+ "Please add or free up more resources then turn off safe mode manually. " + "resources then turn off safe mode manually. NOTE: If you turn off"
+ "NOTE: If you turn off safe mode before adding resources, " + " safe mode before adding resources, "
+ "the NN will immediately return to safe mode."; + "the NN will immediately return to safe mode. ";
} else {
leaveMsg = "Safe mode will be turned off automatically";
} }
if(isManual() && !areResourcesLow()) { if (isManual() || areResourcesLow()) {
leaveMsg = "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off"; return leaveMsg
+ "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
} }
if(blockTotal < 0) //Automatic safemode. System will come out of safemode automatically.
return leaveMsg + "."; leaveMsg = "Safe mode will be turned off automatically";
int numLive = getNumLiveDataNodes(); int numLive = getNumLiveDataNodes();
String msg = ""; String msg = "";
if (reached == 0) { if (reached == 0) {
if (blockSafe < blockThreshold) { if (blockSafe < blockThreshold) {
msg += String.format( msg += String.format(
"The reported blocks %d needs additional %d" "The reported blocks %d needs additional %d"
+ " blocks to reach the threshold %.4f of total blocks %d.", + " blocks to reach the threshold %.4f of total blocks %d.\n",
blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
} }
if (numLive < datanodeThreshold) { if (numLive < datanodeThreshold) {
if (!"".equals(msg)) {
msg += "\n";
}
msg += String.format( msg += String.format(
"The number of live datanodes %d needs an additional %d live " "The number of live datanodes %d needs an additional %d live "
+ "datanodes to reach the minimum number %d.", + "datanodes to reach the minimum number %d.\n",
numLive, (datanodeThreshold - numLive), datanodeThreshold); numLive, (datanodeThreshold - numLive), datanodeThreshold);
} }
msg += " " + leaveMsg;
} else { } else {
msg = String.format("The reported blocks %d has reached the threshold" msg = String.format("The reported blocks %d has reached the threshold"
+ " %.4f of total blocks %d.", blockSafe, threshold, + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
blockTotal);
if (datanodeThreshold > 0) { msg += String.format("The number of live datanodes %d has reached "
msg += String.format(" The number of live datanodes %d has reached " + "the minimum number %d. ",
+ "the minimum number %d.",
numLive, datanodeThreshold); numLive, datanodeThreshold);
}
msg += " " + leaveMsg;
} }
msg += leaveMsg;
// threshold is not reached or manual or resources low // threshold is not reached or manual or resources low
if(reached == 0 || (isManual() && !areResourcesLow())) { if(reached == 0 || (isManual() && !areResourcesLow())) {
return msg + "."; return msg;
} }
// extension period is in progress // extension period is in progress
return msg + " in " + Math.abs(reached + extension - now()) / 1000 return msg + (reached + extension - now() > 0 ?
+ " seconds."; " in " + (reached + extension - now()) / 1000 + " seconds."
: " soon.");
} }
/** /**
@ -5648,7 +5643,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
public String getSafemode() { public String getSafemode() {
if (!this.isInSafeMode()) if (!this.isInSafeMode())
return ""; return "";
return "Safe mode is ON." + this.getSafeModeTip(); return "Safe mode is ON. " + this.getSafeModeTip();
} }
@Override // NameNodeMXBean @Override // NameNodeMXBean

View File

@ -178,9 +178,9 @@ public class TestSafeMode {
final NameNode nn = cluster.getNameNode(); final NameNode nn = cluster.getNameNode();
String status = nn.getNamesystem().getSafemode(); String status = nn.getNamesystem().getSafemode();
assertEquals("Safe mode is ON.The reported blocks 0 needs additional " + assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
"15 blocks to reach the threshold 0.9990 of total blocks 15. " + "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
"Safe mode will be turned off automatically.", status); "Safe mode will be turned off automatically", status);
assertFalse("Mis-replicated block queues should not be initialized " + assertFalse("Mis-replicated block queues should not be initialized " +
"until threshold is crossed", "until threshold is crossed",
NameNodeAdapter.safeModeInitializedReplQueues(nn)); NameNodeAdapter.safeModeInitializedReplQueues(nn));
@ -353,10 +353,10 @@ public class TestSafeMode {
fs = cluster.getFileSystem(); fs = cluster.getFileSystem();
String tipMsg = cluster.getNamesystem().getSafemode(); String tipMsg = cluster.getNamesystem().getSafemode();
assertTrue("Safemode tip message looks right: " + tipMsg, assertTrue("Safemode tip message doesn't look right: " + tipMsg,
tipMsg.contains("The number of live datanodes 0 needs an additional " + tipMsg.contains("The number of live datanodes 0 needs an additional " +
"1 live datanodes to reach the minimum number 1. " + "1 live datanodes to reach the minimum number 1.\n" +
"Safe mode will be turned off automatically.")); "Safe mode will be turned off automatically"));
// Start a datanode // Start a datanode
cluster.startDataNodes(conf, 1, true, null, null); cluster.startDataNodes(conf, 1, true, null, null);

View File

@ -206,11 +206,11 @@ public class TestHASafeMode {
// We expect it not to be stuck in safemode, since those blocks // We expect it not to be stuck in safemode, since those blocks
// that are already visible to the SBN should be processed // that are already visible to the SBN should be processed
// in the initial block reports. // in the initial block reports.
assertSafeMode(nn1, 3, 3); assertSafeMode(nn1, 3, 3, 3, 0);
banner("Waiting for standby to catch up to active namespace"); banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 8, 8); assertSafeMode(nn1, 8, 8, 3, 0);
} }
/** /**
@ -230,7 +230,7 @@ public class TestHASafeMode {
banner("Restarting standby"); banner("Restarting standby");
restartStandby(); restartStandby();
assertSafeMode(nn1, 3, 3); assertSafeMode(nn1, 3, 3, 3, 0);
// Create a few blocks which will send blockReceived calls to the // Create a few blocks which will send blockReceived calls to the
// SBN. // SBN.
@ -241,7 +241,7 @@ public class TestHASafeMode {
banner("Waiting for standby to catch up to active namespace"); banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 8, 8); assertSafeMode(nn1, 8, 8, 3, 0);
} }
/** /**
@ -281,11 +281,11 @@ public class TestHASafeMode {
banner("Restarting standby"); banner("Restarting standby");
restartStandby(); restartStandby();
assertSafeMode(nn1, 0, 5); assertSafeMode(nn1, 0, 5, 3, 0);
banner("Waiting for standby to catch up to active namespace"); banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 0, 0); assertSafeMode(nn1, 0, 0, 3, 0);
} }
/** /**
@ -307,7 +307,7 @@ public class TestHASafeMode {
restartStandby(); restartStandby();
// It will initially have all of the blocks necessary. // It will initially have all of the blocks necessary.
assertSafeMode(nn1, 10, 10); assertSafeMode(nn1, 10, 10, 3, 0);
// Delete those blocks while the SBN is in safe mode. // Delete those blocks while the SBN is in safe mode.
// This doesn't affect the SBN, since deletions are not // This doesn't affect the SBN, since deletions are not
@ -322,14 +322,14 @@ public class TestHASafeMode {
HATestUtil.waitForDNDeletions(cluster); HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports(); cluster.triggerDeletionReports();
assertSafeMode(nn1, 10, 10); assertSafeMode(nn1, 10, 10, 3, 0);
// When we catch up to active namespace, it will restore back // When we catch up to active namespace, it will restore back
// to 0 blocks. // to 0 blocks.
banner("Waiting for standby to catch up to active namespace"); banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 0, 0); assertSafeMode(nn1, 0, 0, 3, 0);
} }
/** /**
@ -355,20 +355,20 @@ public class TestHASafeMode {
restartStandby(); restartStandby();
// It will initially have all of the blocks necessary. // It will initially have all of the blocks necessary.
assertSafeMode(nn1, 5, 5); assertSafeMode(nn1, 5, 5, 3, 0);
// Append to a block while SBN is in safe mode. This should // Append to a block while SBN is in safe mode. This should
// not affect safemode initially, since the DN message // not affect safemode initially, since the DN message
// will get queued. // will get queued.
FSDataOutputStream stm = fs.append(new Path("/test")); FSDataOutputStream stm = fs.append(new Path("/test"));
try { try {
assertSafeMode(nn1, 5, 5); assertSafeMode(nn1, 5, 5, 3, 0);
// if we roll edits now, the SBN should see that it's under construction // if we roll edits now, the SBN should see that it's under construction
// and change its total count and safe count down by one, since UC // and change its total count and safe count down by one, since UC
// blocks are not counted by safe mode. // blocks are not counted by safe mode.
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 4, 4); assertSafeMode(nn1, 4, 4, 3, 0);
} finally { } finally {
IOUtils.closeStream(stm); IOUtils.closeStream(stm);
} }
@ -386,13 +386,13 @@ public class TestHASafeMode {
HATestUtil.waitForDNDeletions(cluster); HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports(); cluster.triggerDeletionReports();
assertSafeMode(nn1, 4, 4); assertSafeMode(nn1, 4, 4, 3, 0);
// When we roll the edit log, the deletions will go through. // When we roll the edit log, the deletions will go through.
banner("Waiting for standby to catch up to active namespace"); banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 0, 0); assertSafeMode(nn1, 0, 0, 3, 0);
} }
/** /**
@ -424,20 +424,21 @@ public class TestHASafeMode {
restartActive(); restartActive();
} }
private static void assertSafeMode(NameNode nn, int safe, int total) { private static void assertSafeMode(NameNode nn, int safe, int total,
int numNodes, int nodeThresh) {
String status = nn.getNamesystem().getSafemode(); String status = nn.getNamesystem().getSafemode();
if (safe == total) { if (safe == total) {
assertTrue("Bad safemode status: '" + status + "'", assertTrue("Bad safemode status: '" + status + "'",
status.startsWith( status.startsWith(
"Safe mode is ON." + "Safe mode is ON. The reported blocks " + safe + " has reached the "
"The reported blocks " + safe + " has reached the threshold " + + "threshold 0.9990 of total blocks " + total + ". The number of "
"0.9990 of total blocks " + total + ". Safe mode will be " + + "live datanodes " + numNodes + " has reached the minimum number "
"turned off automatically")); + nodeThresh + ". Safe mode will be turned off automatically"));
} else { } else {
int additional = total - safe; int additional = total - safe;
assertTrue("Bad safemode status: '" + status + "'", assertTrue("Bad safemode status: '" + status + "'",
status.startsWith( status.startsWith(
"Safe mode is ON." + "Safe mode is ON. " +
"The reported blocks " + safe + " needs additional " + "The reported blocks " + safe + " needs additional " +
additional + " blocks")); additional + " blocks"));
} }
@ -467,14 +468,14 @@ public class TestHASafeMode {
// We expect it to be on its way out of safemode, since all of the blocks // We expect it to be on its way out of safemode, since all of the blocks
// from the edit log have been reported. // from the edit log have been reported.
assertSafeMode(nn1, 3, 3); assertSafeMode(nn1, 3, 3, 3, 0);
// Initiate a failover into it while it's in safemode // Initiate a failover into it while it's in safemode
banner("Initiating a failover into NN1 in safemode"); banner("Initiating a failover into NN1 in safemode");
NameNodeAdapter.abortEditLogs(nn0); NameNodeAdapter.abortEditLogs(nn0);
cluster.transitionToActive(1); cluster.transitionToActive(1);
assertSafeMode(nn1, 5, 5); assertSafeMode(nn1, 5, 5, 3, 0);
} }
/** /**
@ -499,10 +500,11 @@ public class TestHASafeMode {
// It will initially have all of the blocks necessary. // It will initially have all of the blocks necessary.
String status = nn1.getNamesystem().getSafemode(); String status = nn1.getNamesystem().getSafemode();
assertTrue("Bad safemode status: '" + status + "'", assertTrue("Bad safemode status: '" + status + "'",
status.startsWith( status.startsWith(
"Safe mode is ON." + "Safe mode is ON. The reported blocks 10 has reached the threshold "
"The reported blocks 10 has reached the threshold 0.9990 of " + + "0.9990 of total blocks 10. The number of live datanodes 3 has "
"total blocks 10. Safe mode will be turned off automatically")); + "reached the minimum number 0. Safe mode will be turned off "
+ "automatically"));
// Delete those blocks while the SBN is in safe mode. // Delete those blocks while the SBN is in safe mode.
// Immediately roll the edit log before the actual deletions are sent // Immediately roll the edit log before the actual deletions are sent
@ -512,7 +514,7 @@ public class TestHASafeMode {
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
// Should see removal of the blocks as well as their contribution to safe block count. // Should see removal of the blocks as well as their contribution to safe block count.
assertSafeMode(nn1, 0, 0); assertSafeMode(nn1, 0, 0, 3, 0);
banner("Triggering sending deletions to DNs and Deletion Reports"); banner("Triggering sending deletions to DNs and Deletion Reports");
@ -525,7 +527,7 @@ public class TestHASafeMode {
// No change in assertion status here, but some of the consistency checks // No change in assertion status here, but some of the consistency checks
// in safemode will fire here if we accidentally decrement safe block count // in safemode will fire here if we accidentally decrement safe block count
// below 0. // below 0.
assertSafeMode(nn1, 0, 0); assertSafeMode(nn1, 0, 0, 3, 0);
} }
@ -561,11 +563,11 @@ public class TestHASafeMode {
banner("Restarting SBN"); banner("Restarting SBN");
restartStandby(); restartStandby();
assertSafeMode(nn1, 10, 10); assertSafeMode(nn1, 10, 10, 3, 0);
banner("Allowing SBN to catch up"); banner("Allowing SBN to catch up");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1); HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 15, 15); assertSafeMode(nn1, 15, 15, 3, 0);
} }
/** /**
@ -593,7 +595,7 @@ public class TestHASafeMode {
nn0.getRpcServer().rollEditLog(); nn0.getRpcServer().rollEditLog();
restartStandby(); restartStandby();
assertSafeMode(nn1, 6, 6); assertSafeMode(nn1, 6, 6, 3, 0);
} }
/** /**