diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 9c7b683f010..1f917159741 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -165,6 +165,8 @@ Release 2.0.1-alpha - UNRELEASED HDFS-3485. DataTransferThrottler will over-throttle when currentTimeMillis jumps (Andy Isaacson via todd) + HDFS-2914. HA: Standby should not enter safemode when resources are low. (Vinay via atm) + BREAKDOWN OF HDFS-3042 SUBTASKS HDFS-2185. HDFS portion of ZK-based FailoverController (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 0c65c2b90ec..c0462004533 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -557,8 +557,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, !safeMode.isPopulatingReplQueues(); setBlockTotal(); blockManager.activate(conf); - this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); - nnrmthread.start(); } finally { writeUnlock(); } @@ -575,7 +573,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, writeLock(); try { if (blockManager != null) blockManager.close(); - if (nnrmthread != null) nnrmthread.interrupt(); } finally { writeUnlock(); } @@ -629,6 +626,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } leaseManager.startMonitor(); startSecretManagerIfNecessary(); + + //ResourceMonitor required only at ActiveNN. See HDFS-2914 + this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); + nnrmthread.start(); } finally { writeUnlock(); } @@ -651,6 +652,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, if (leaseManager != null) { leaseManager.stopMonitor(); } + if (nnrmthread != null) { + ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); + nnrmthread.interrupt(); + } if (dir != null && dir.fsImage != null) { if (dir.fsImage.editLog != null) { dir.fsImage.editLog.close(); @@ -3178,10 +3183,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * acceptable levels, this daemon will cause the NN to exit safe mode. */ class NameNodeResourceMonitor implements Runnable { + boolean shouldNNRmRun = true; @Override public void run () { try { - while (fsRunning) { + while (fsRunning && shouldNNRmRun) { checkAvailableResources(); if(!nameNodeHasResourcesAvailable()) { String lowResourcesMsg = "NameNode low on available disk space. "; @@ -3202,7 +3208,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e); } } - } + + public void stopMonitor() { + shouldNNRmRun = false; + } + } public FSImage getFSImage() { return dir.fsImage; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java index cc9552aec2a..a158a5ed6b2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode.ha; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; import static org.junit.Assert.*; import java.io.File; @@ -127,6 +129,7 @@ public class TestFailureOfSharedDir { @Test public void testFailureOfSharedDir() throws Exception { Configuration conf = new Configuration(); + conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000); // The shared edits dir will automatically be marked required. MiniDFSCluster cluster = null; @@ -151,6 +154,15 @@ public class TestFailureOfSharedDir { assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w", true)); + Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, + DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2); + + NameNode nn1 = cluster.getNameNode(1); + assertTrue(nn1.isStandbyState()); + assertFalse( + "StandBy NameNode should not go to SafeMode on resource unavailability", + nn1.isInSafeMode()); + NameNode nn0 = cluster.getNameNode(0); nn0.getNamesystem().getFSImage().getEditLog().getJournalSet() .setRuntimeForTesting(mockRuntime);