HDFS-2914. HA: Standby should not enter safemode when resources are low. Contributed by Vinay.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1347898 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fb52b5ec9e
commit
4757836b75
|
@ -165,6 +165,8 @@ Release 2.0.1-alpha - UNRELEASED
|
|||
HDFS-3485. DataTransferThrottler will over-throttle when currentTimeMillis
|
||||
jumps (Andy Isaacson via todd)
|
||||
|
||||
HDFS-2914. HA: Standby should not enter safemode when resources are low. (Vinay via atm)
|
||||
|
||||
BREAKDOWN OF HDFS-3042 SUBTASKS
|
||||
|
||||
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
|
||||
|
|
|
@ -557,8 +557,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
!safeMode.isPopulatingReplQueues();
|
||||
setBlockTotal();
|
||||
blockManager.activate(conf);
|
||||
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
|
||||
nnrmthread.start();
|
||||
} finally {
|
||||
writeUnlock();
|
||||
}
|
||||
|
@ -575,7 +573,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
writeLock();
|
||||
try {
|
||||
if (blockManager != null) blockManager.close();
|
||||
if (nnrmthread != null) nnrmthread.interrupt();
|
||||
} finally {
|
||||
writeUnlock();
|
||||
}
|
||||
|
@ -629,6 +626,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
}
|
||||
leaseManager.startMonitor();
|
||||
startSecretManagerIfNecessary();
|
||||
|
||||
//ResourceMonitor required only at ActiveNN. See HDFS-2914
|
||||
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
|
||||
nnrmthread.start();
|
||||
} finally {
|
||||
writeUnlock();
|
||||
}
|
||||
|
@ -651,6 +652,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
if (leaseManager != null) {
|
||||
leaseManager.stopMonitor();
|
||||
}
|
||||
if (nnrmthread != null) {
|
||||
((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
|
||||
nnrmthread.interrupt();
|
||||
}
|
||||
if (dir != null && dir.fsImage != null) {
|
||||
if (dir.fsImage.editLog != null) {
|
||||
dir.fsImage.editLog.close();
|
||||
|
@ -3178,10 +3183,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
* acceptable levels, this daemon will cause the NN to exit safe mode.
|
||||
*/
|
||||
class NameNodeResourceMonitor implements Runnable {
|
||||
boolean shouldNNRmRun = true;
|
||||
@Override
|
||||
public void run () {
|
||||
try {
|
||||
while (fsRunning) {
|
||||
while (fsRunning && shouldNNRmRun) {
|
||||
checkAvailableResources();
|
||||
if(!nameNodeHasResourcesAvailable()) {
|
||||
String lowResourcesMsg = "NameNode low on available disk space. ";
|
||||
|
@ -3202,7 +3208,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void stopMonitor() {
|
||||
shouldNNRmRun = false;
|
||||
}
|
||||
}
|
||||
|
||||
public FSImage getFSImage() {
|
||||
return dir.fsImage;
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode.ha;
|
||||
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -127,6 +129,7 @@ public class TestFailureOfSharedDir {
|
|||
@Test
|
||||
public void testFailureOfSharedDir() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
|
||||
|
||||
// The shared edits dir will automatically be marked required.
|
||||
MiniDFSCluster cluster = null;
|
||||
|
@ -151,6 +154,15 @@ public class TestFailureOfSharedDir {
|
|||
assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
|
||||
true));
|
||||
|
||||
Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
|
||||
DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
|
||||
|
||||
NameNode nn1 = cluster.getNameNode(1);
|
||||
assertTrue(nn1.isStandbyState());
|
||||
assertFalse(
|
||||
"StandBy NameNode should not go to SafeMode on resource unavailability",
|
||||
nn1.isInSafeMode());
|
||||
|
||||
NameNode nn0 = cluster.getNameNode(0);
|
||||
nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
|
||||
.setRuntimeForTesting(mockRuntime);
|
||||
|
|
Loading…
Reference in New Issue