From 8f971b0e5413b491a2c7043bd25b046777e07395 Mon Sep 17 00:00:00 2001 From: litao Date: Tue, 22 Nov 2022 02:14:07 +0800 Subject: [PATCH] HDFS-16547. [SBN read] Namenode in safe mode should not be transfer to observer state (#4201) Signed-off-by: Erik Krogen Reviewed-by: Zengqiang Xu --- .../hadoop/hdfs/server/namenode/NameNode.java | 3 +++ .../apache/hadoop/hdfs/tools/DFSHAAdmin.java | 11 +++++--- .../src/main/resources/hdfs-default.xml | 2 +- .../markdown/HDFSHighAvailabilityWithNFS.md | 6 +++-- .../markdown/HDFSHighAvailabilityWithQJM.md | 6 +++-- .../server/namenode/ha/TestHASafeMode.java | 22 ++++++++++++++++ .../hdfs/tools/TestDFSHAAdminMiniCluster.java | 26 ++++++++++++++++++- 7 files changed, 67 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 32ff45b2bfd..36ea5c2f646 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -2009,6 +2009,9 @@ public class NameNode extends ReconfigurableBase implements synchronized void transitionToObserver() throws IOException { String operationName = "transitionToObserver"; namesystem.checkSuperuserPrivilege(operationName); + if (notBecomeActiveInSafemode && isInSafeMode()) { + throw new ServiceFailedException(getRole() + " still not leave safemode"); + } if (!haEnabled) { throw new ServiceFailedException("HA for namenode is not enabled"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java index 4d0d56c05a7..a462cafb458 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java @@ -247,7 +247,7 @@ public class DFSHAAdmin extends HAAdmin { } private int transitionToObserver(final CommandLine cmd) - throws IOException, ServiceFailedException { + throws IOException { String[] argv = cmd.getArgs(); if (argv.length != 1) { errOut.println("transitionToObserver: incorrect number of arguments"); @@ -262,8 +262,13 @@ public class DFSHAAdmin extends HAAdmin { if (!checkManualStateManagementOK(target)) { return -1; } - HAServiceProtocol proto = target.getProxy(getConf(), 0); - HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo()); + try { + HAServiceProtocol proto = target.getProxy(getConf(), 0); + HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo()); + } catch (ServiceFailedException e) { + errOut.println("transitionToObserver failed! " + e.getLocalizedMessage()); + return -1; + } return 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 7b65f1d9c27..e4cb5b9ffe1 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -3725,7 +3725,7 @@ dfs.ha.nn.not-become-active-in-safemode false - This will prevent safe mode namenodes to become active while other standby + This will prevent safe mode namenodes to become active or observer while other standby namenodes might be ready to serve requests when it is set to true. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md index df1ab68afca..aec1534b03b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md @@ -316,12 +316,14 @@ The order in which you set these configurations is unimportant, but the values y hdfs://mycluster -* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active +* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer Whether allow namenode to become active when it is in safemode, when it is set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if auto failover is on, or will throw exception to fail the transition to - active if auto failover is off. For example: + active if auto failover is off. If you transition namenode to observer state + when it is in safemode, when this configuration is set to true, namenode will throw exception + to fail the transition to observer. For example: dfs.ha.nn.not-become-active-in-safemode diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md index 6bdd4e1c529..5591f4f2245 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md @@ -376,12 +376,14 @@ The order in which you set these configurations is unimportant, but the values y /path/to/journal/node/local/data -* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active +* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer Whether allow namenode to become active when it is in safemode, when it is set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if auto failover is on, or will throw exception to fail the transition to - active if auto failover is off. For example: + active if auto failover is off. If you transition namenode to observer state + when it is in safemode, when this configuration is set to true, namenode will throw exception + to fail the transition to observer. For example: dfs.ha.nn.not-become-active-in-safemode diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index 16a57c68672..4766c4cecc9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -977,4 +977,26 @@ public class TestHASafeMode { () -> miniCluster.transitionToActive(0)); } } + + @Test + public void testTransitionToObserverWhenSafeMode() throws Exception { + Configuration config = new Configuration(); + config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true); + try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config, + new File(GenericTestUtils.getRandomizedTempPath())) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(1) + .build()) { + miniCluster.waitActive(); + miniCluster.transitionToStandby(0); + miniCluster.transitionToStandby(1); + NameNode namenode0 = miniCluster.getNameNode(0); + NameNode namenode1 = miniCluster.getNameNode(1); + NameNodeAdapter.enterSafeMode(namenode0, false); + NameNodeAdapter.enterSafeMode(namenode1, false); + LambdaTestUtils.intercept(ServiceFailedException.class, + "NameNode still not leave safemode", + () -> miniCluster.transitionToObserver(0)); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java index aa048f865c2..d0edd175ec1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hdfs.tools; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; @@ -70,6 +72,7 @@ public class TestDFSHAAdminMiniCluster { @Before public void setup() throws IOException { conf = new Configuration(); + conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0) .build(); @@ -161,7 +164,28 @@ public class TestDFSHAAdminMiniCluster { assertEquals(-1, runTool("-transitionToActive", "nn1")); assertFalse(nnode1.isActiveState()); } - + + /** + * Tests that a Namenode in safe mode should not be transfer to observer. + */ + @Test + public void testObserverTransitionInSafeMode() throws Exception { + NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false); + DFSHAAdmin admin = new DFSHAAdmin(); + admin.setConf(conf); + System.setIn(new ByteArrayInputStream("yes\n".getBytes())); + int result = admin.run( + new String[]{"-transitionToObserver", "-forcemanual", "nn1"}); + assertEquals("State transition returned: " + result, -1, result); + + NameNodeAdapter.leaveSafeMode(cluster.getNameNode(0)); + System.setIn(new ByteArrayInputStream("yes\n".getBytes())); + int result1 = admin.run( + new String[]{"-transitionToObserver", "-forcemanual", "nn1"}); + assertEquals("State transition returned: " + result1, 0, result1); + assertFalse(cluster.getNameNode(0).isInSafeMode()); + } + @Test public void testTryFailoverToSafeMode() throws Exception { conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,