From 8358d7c175950392f30850ef27e9dfbe908dedd3 Mon Sep 17 00:00:00 2001 From: Konstantin V Shvachko Date: Mon, 4 May 2020 10:29:50 -0700 Subject: [PATCH] HDFS-15323. StandbyNode fails transition to active due to insufficient transaction tailing. Contributed by Konstantin V Shvachko. (cherry picked from commit ebb878bab991c242b5089a18881aa10abf318ea0) --- .../qjournal/client/QuorumJournalManager.java | 4 +-- .../server/namenode/ha/EditLogTailer.java | 24 +++++++++++----- .../ha/TestStandbyInProgressTail.java | 28 +++++++++++++++++++ 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java index 94b5832f42c..f8ebd89e3e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java @@ -73,9 +73,9 @@ public class QuorumJournalManager implements JournalManager { static final Log LOG = LogFactory.getLog(QuorumJournalManager.class); // This config is not publicly exposed - static final String QJM_RPC_MAX_TXNS_KEY = + public static final String QJM_RPC_MAX_TXNS_KEY = "dfs.ha.tail-edits.qjm.rpc.max-txns"; - static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000; + public static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000; // Maximum number of transactions to fetch at a time when using the // RPC edit fetch mechanism diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java index ca231b48c15..276b76fed17 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java @@ -299,13 +299,23 @@ public class EditLogTailer { SecurityUtil.doAsLoginUser(new PrivilegedExceptionAction() { @Override public Void run() throws Exception { - try { - // It is already under the full name system lock and the checkpointer - // thread is already stopped. No need to acqure any other lock. - doTailEdits(); - } catch (InterruptedException e) { - throw new IOException(e); - } + long editsTailed = 0; + // Fully tail the journal to the end + do { + long startTime = Time.monotonicNow(); + try { + NameNode.getNameNodeMetrics().addEditLogTailInterval( + startTime - lastLoadTimeMs); + // It is already under the name system lock and the checkpointer + // thread is already stopped. No need to acquire any other lock. + editsTailed = doTailEdits(); + } catch (InterruptedException e) { + throw new IOException(e); + } finally { + NameNode.getNameNodeMetrics().addEditLogTailTime( + Time.monotonicNow() - startTime); + } + } while(editsTailed > 0); return null; } }); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java index 0420579cb97..76929661d03 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.test.GenericTestUtils; import static org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter.getFileInfo; +import static org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.QJM_RPC_MAX_TXNS_KEY; import org.junit.After; import org.junit.Before; @@ -72,6 +73,8 @@ public class TestStandbyInProgressTail { conf.setBoolean(DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY, true); conf.setInt(DFSConfigKeys.DFS_QJOURNAL_SELECT_INPUT_STREAMS_TIMEOUT_KEY, 500); + // Set very samll limit of transactions per a journal rpc call + conf.setInt(QJM_RPC_MAX_TXNS_KEY, 3); HAUtil.setAllowStandbyReads(conf, true); qjmhaCluster = new MiniQJMHACluster.Builder(conf).build(); cluster = qjmhaCluster.getDfsCluster(); @@ -300,6 +303,31 @@ public class TestStandbyInProgressTail { waitForFileInfo(nn1, "/test", "/test2", "/test3"); } + /** + * Test that Standby Node tails multiple segments while catching up + * during the transition to Active. + */ + @Test + public void testUndertailingWhileFailover() throws Exception { + cluster.transitionToActive(0); + cluster.waitActive(0); + + String p = "/testFailoverWhileTailingWithoutCache/"; + mkdirs(nn0, p + 0, p + 1, p + 2, p + 3, p + 4); + nn0.getRpcServer().rollEditLog(); // create segment 1 + + mkdirs(nn0, p + 5, p + 6, p + 7, p + 8, p + 9); + nn0.getRpcServer().rollEditLog(); // create segment 2 + + mkdirs(nn0, p + 10, p + 11, p + 12, p + 13, p + 14); + nn0.getRpcServer().rollEditLog(); // create segment 3 + + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + cluster.waitActive(1); + waitForFileInfo(nn1, p + 0, p + 1, p + 14); + } + @Test public void testNonUniformConfig() throws Exception { // Test case where some NNs (in this case the active NN) in the cluster