HDFS-6120. Fix and improve safe mode log messages. (Arpit Agarwal)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1580047 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2fbd94db3e
commit
fc53af9c4c
|
@ -430,6 +430,8 @@ Release 2.4.0 - UNRELEASED
|
||||||
HDFS-6138. Add a user guide for how to use viewfs with federation.
|
HDFS-6138. Add a user guide for how to use viewfs with federation.
|
||||||
(sanjay and szetszwo via szetszwo)
|
(sanjay and szetszwo via szetszwo)
|
||||||
|
|
||||||
|
HDFS-6120. Fix and improve safe mode log messages. (Arpit Agarwal)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
||||||
|
|
|
@ -91,7 +91,6 @@ import java.io.BufferedWriter;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.DataOutputStream;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
|
@ -4823,13 +4822,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
* @return true if can leave or false otherwise.
|
* @return true if can leave or false otherwise.
|
||||||
*/
|
*/
|
||||||
private synchronized boolean canLeave() {
|
private synchronized boolean canLeave() {
|
||||||
if (reached == 0)
|
if (reached == 0) {
|
||||||
return false;
|
|
||||||
if (now() - reached < extension) {
|
|
||||||
reportStatus("STATE* Safe mode ON.", false);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return !needEnter();
|
|
||||||
|
if (now() - reached < extension) {
|
||||||
|
reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (needEnter()) {
|
||||||
|
reportStatus("STATE* Safe mode ON, thresholds not met.", false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -4973,56 +4980,59 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
* A tip on how safe mode is to be turned off: manually or automatically.
|
* A tip on how safe mode is to be turned off: manually or automatically.
|
||||||
*/
|
*/
|
||||||
String getTurnOffTip() {
|
String getTurnOffTip() {
|
||||||
if(!isOn())
|
if(!isOn()) {
|
||||||
return "Safe mode is OFF.";
|
return "Safe mode is OFF.";
|
||||||
|
}
|
||||||
|
|
||||||
//Manual OR low-resource safemode. (Admin intervention required)
|
//Manual OR low-resource safemode. (Admin intervention required)
|
||||||
String leaveMsg = "It was turned on manually. ";
|
String adminMsg = "It was turned on manually. ";
|
||||||
if (areResourcesLow()) {
|
if (areResourcesLow()) {
|
||||||
leaveMsg = "Resources are low on NN. Please add or free up more "
|
adminMsg = "Resources are low on NN. Please add or free up more "
|
||||||
+ "resources then turn off safe mode manually. NOTE: If you turn off"
|
+ "resources then turn off safe mode manually. NOTE: If you turn off"
|
||||||
+ " safe mode before adding resources, "
|
+ " safe mode before adding resources, "
|
||||||
+ "the NN will immediately return to safe mode. ";
|
+ "the NN will immediately return to safe mode. ";
|
||||||
}
|
}
|
||||||
if (isManual() || areResourcesLow()) {
|
if (isManual() || areResourcesLow()) {
|
||||||
return leaveMsg
|
return adminMsg
|
||||||
+ "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
|
+ "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
|
||||||
}
|
}
|
||||||
|
|
||||||
//Automatic safemode. System will come out of safemode automatically.
|
boolean thresholdsMet = true;
|
||||||
leaveMsg = "Safe mode will be turned off automatically";
|
|
||||||
int numLive = getNumLiveDataNodes();
|
int numLive = getNumLiveDataNodes();
|
||||||
String msg = "";
|
String msg = "";
|
||||||
if (reached == 0) {
|
if (blockSafe < blockThreshold) {
|
||||||
if (blockSafe < blockThreshold) {
|
msg += String.format(
|
||||||
msg += String.format(
|
"The reported blocks %d needs additional %d"
|
||||||
"The reported blocks %d needs additional %d"
|
+ " blocks to reach the threshold %.4f of total blocks %d.\n",
|
||||||
+ " blocks to reach the threshold %.4f of total blocks %d.\n",
|
blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
|
||||||
blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
|
thresholdsMet = false;
|
||||||
}
|
|
||||||
if (numLive < datanodeThreshold) {
|
|
||||||
msg += String.format(
|
|
||||||
"The number of live datanodes %d needs an additional %d live "
|
|
||||||
+ "datanodes to reach the minimum number %d.\n",
|
|
||||||
numLive, (datanodeThreshold - numLive), datanodeThreshold);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
msg = String.format("The reported blocks %d has reached the threshold"
|
msg += String.format("The reported blocks %d has reached the threshold"
|
||||||
+ " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
|
+ " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
|
||||||
|
}
|
||||||
|
if (numLive < datanodeThreshold) {
|
||||||
|
msg += String.format(
|
||||||
|
"The number of live datanodes %d needs an additional %d live "
|
||||||
|
+ "datanodes to reach the minimum number %d.\n",
|
||||||
|
numLive, (datanodeThreshold - numLive), datanodeThreshold);
|
||||||
|
thresholdsMet = false;
|
||||||
|
} else {
|
||||||
msg += String.format("The number of live datanodes %d has reached "
|
msg += String.format("The number of live datanodes %d has reached "
|
||||||
+ "the minimum number %d. ",
|
+ "the minimum number %d. ",
|
||||||
numLive, datanodeThreshold);
|
numLive, datanodeThreshold);
|
||||||
}
|
}
|
||||||
msg += leaveMsg;
|
msg += (reached > 0) ? "In safe mode extension. " : "";
|
||||||
// threshold is not reached or manual or resources low
|
msg += "Safe mode will be turned off automatically ";
|
||||||
if(reached == 0 || (isManual() && !areResourcesLow())) {
|
|
||||||
return msg;
|
if (!thresholdsMet) {
|
||||||
|
msg += "once the thresholds have been reached.";
|
||||||
|
} else if (reached + extension - now() > 0) {
|
||||||
|
msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
|
||||||
|
} else {
|
||||||
|
msg += "soon.";
|
||||||
}
|
}
|
||||||
// extension period is in progress
|
|
||||||
return msg + (reached + extension - now() > 0 ?
|
return msg;
|
||||||
" in " + (reached + extension - now()) / 1000 + " seconds."
|
|
||||||
: " soon.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -182,7 +182,9 @@ public class TestSafeMode {
|
||||||
String status = nn.getNamesystem().getSafemode();
|
String status = nn.getNamesystem().getSafemode();
|
||||||
assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
|
assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
|
||||||
"15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
|
"15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
|
||||||
"Safe mode will be turned off automatically", status);
|
"The number of live datanodes 0 has reached the minimum number 0. " +
|
||||||
|
"Safe mode will be turned off automatically once the thresholds " +
|
||||||
|
"have been reached.", status);
|
||||||
assertFalse("Mis-replicated block queues should not be initialized " +
|
assertFalse("Mis-replicated block queues should not be initialized " +
|
||||||
"until threshold is crossed",
|
"until threshold is crossed",
|
||||||
NameNodeAdapter.safeModeInitializedReplQueues(nn));
|
NameNodeAdapter.safeModeInitializedReplQueues(nn));
|
||||||
|
|
|
@ -495,7 +495,8 @@ public class TestHASafeMode {
|
||||||
"Safe mode is ON. The reported blocks " + safe + " has reached the "
|
"Safe mode is ON. The reported blocks " + safe + " has reached the "
|
||||||
+ "threshold 0.9990 of total blocks " + total + ". The number of "
|
+ "threshold 0.9990 of total blocks " + total + ". The number of "
|
||||||
+ "live datanodes " + numNodes + " has reached the minimum number "
|
+ "live datanodes " + numNodes + " has reached the minimum number "
|
||||||
+ nodeThresh + ". Safe mode will be turned off automatically"));
|
+ nodeThresh + ". In safe mode extension. "
|
||||||
|
+ "Safe mode will be turned off automatically"));
|
||||||
} else {
|
} else {
|
||||||
int additional = total - safe;
|
int additional = total - safe;
|
||||||
assertTrue("Bad safemode status: '" + status + "'",
|
assertTrue("Bad safemode status: '" + status + "'",
|
||||||
|
@ -565,8 +566,8 @@ public class TestHASafeMode {
|
||||||
status.startsWith(
|
status.startsWith(
|
||||||
"Safe mode is ON. The reported blocks 10 has reached the threshold "
|
"Safe mode is ON. The reported blocks 10 has reached the threshold "
|
||||||
+ "0.9990 of total blocks 10. The number of live datanodes 3 has "
|
+ "0.9990 of total blocks 10. The number of live datanodes 3 has "
|
||||||
+ "reached the minimum number 0. Safe mode will be turned off "
|
+ "reached the minimum number 0. In safe mode extension. "
|
||||||
+ "automatically"));
|
+ "Safe mode will be turned off automatically"));
|
||||||
|
|
||||||
// Delete those blocks while the SBN is in safe mode.
|
// Delete those blocks while the SBN is in safe mode.
|
||||||
// Immediately roll the edit log before the actual deletions are sent
|
// Immediately roll the edit log before the actual deletions are sent
|
||||||
|
|
Loading…
Reference in New Issue