From 57c9a8509f4064d06a02aa92ab26f7a39d98826d Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 14 Dec 2021 13:51:51 +0530 Subject: [PATCH] HDFS-16373. Fix MiniDFSCluster restart in case of multiple namenodes. (#3756) Reviewed-by: Viraj Jasani Reviewed-by: litao Signed-off-by: Takanobu Asanuma --- .../apache/hadoop/hdfs/MiniDFSCluster.java | 43 ++++++++++--------- .../hadoop/hdfs/TestMiniDFSCluster.java | 8 ++++ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index ff38236987b..6d2dabf3c9f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -2249,9 +2249,11 @@ public class MiniDFSCluster implements AutoCloseable { info.nameNode = nn; info.setStartOpt(startOpt); if (waitActive) { - waitClusterUp(); + if (numDataNodes > 0) { + waitNameNodeUp(nnIndex); + } LOG.info("Restarted the namenode"); - waitActive(); + waitActive(nnIndex); } } @@ -2761,11 +2763,25 @@ public class MiniDFSCluster implements AutoCloseable { DFSClient client = new DFSClient(addr, conf); // ensure all datanodes have registered and sent heartbeat to the namenode - while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) { + int failedCount = 0; + while (true) { try { - LOG.info("Waiting for cluster to become active"); - Thread.sleep(100); + while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) { + LOG.info("Waiting for cluster to become active"); + Thread.sleep(100); + } + break; + } catch (IOException e) { + failedCount++; + // Cached RPC connection to namenode, if any, is expected to fail once + if (failedCount > 1) { + LOG.warn("Tried waitActive() " + failedCount + + " time(s) and failed, giving up. " + StringUtils + .stringifyException(e)); + throw e; + } } catch (InterruptedException e) { + throw new IOException(e); } } @@ -2801,22 +2817,7 @@ public class MiniDFSCluster implements AutoCloseable { */ public void waitActive() throws IOException { for (int index = 0; index < namenodes.size(); index++) { - int failedCount = 0; - while (true) { - try { - waitActive(index); - break; - } catch (IOException e) { - failedCount++; - // Cached RPC connection to namenode, if any, is expected to fail once - if (failedCount > 1) { - LOG.warn("Tried waitActive() " + failedCount - + " time(s) and failed, giving up. " - + StringUtils.stringifyException(e)); - throw e; - } - } - } + waitActive(index); } LOG.info("Cluster is active"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java index 74a8e44bf7b..6b428c5f58e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java @@ -309,6 +309,14 @@ public class TestMiniDFSCluster { DFSUtil.addKeySuffixes( DFS_NAMENODE_HTTP_ADDRESS_KEY, "ns1", "nn1"))); } + + // Shutdown namenodes individually. + cluster.shutdownNameNode(0); + cluster.shutdownNameNode(1); + + // Restart namenodes individually with wait active, both should be successful. + cluster.restartNameNode(0); + cluster.restartNameNode(1); } } }