HDFS-15323. StandbyNode fails transition to active due to insufficient transaction tailing. Contributed by Konstantin V Shvachko.

(cherry picked from commit ebb878bab9)
This commit is contained in:
Konstantin V Shvachko 2020-05-04 10:29:50 -07:00
parent b23a585cb1
commit e95a4ddbb1
3 changed files with 47 additions and 9 deletions

View File

@ -73,9 +73,9 @@ public class QuorumJournalManager implements JournalManager {
static final Logger LOG = LoggerFactory.getLogger(QuorumJournalManager.class);
// This config is not publicly exposed
static final String QJM_RPC_MAX_TXNS_KEY =
public static final String QJM_RPC_MAX_TXNS_KEY =
"dfs.ha.tail-edits.qjm.rpc.max-txns";
static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000;
public static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000;
// Maximum number of transactions to fetch at a time when using the
// RPC edit fetch mechanism

View File

@ -298,13 +298,23 @@ public class EditLogTailer {
SecurityUtil.doAsLoginUser(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
long editsTailed = 0;
// Fully tail the journal to the end
do {
long startTime = Time.monotonicNow();
try {
// It is already under the full name system lock and the checkpointer
// thread is already stopped. No need to acqure any other lock.
doTailEdits();
NameNode.getNameNodeMetrics().addEditLogTailInterval(
startTime - lastLoadTimeMs);
// It is already under the name system lock and the checkpointer
// thread is already stopped. No need to acquire any other lock.
editsTailed = doTailEdits();
} catch (InterruptedException e) {
throw new IOException(e);
} finally {
NameNode.getNameNodeMetrics().addEditLogTailTime(
Time.monotonicNow() - startTime);
}
} while(editsTailed > 0);
return null;
}
});

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.test.GenericTestUtils;
import static org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter.getFileInfo;
import static org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.QJM_RPC_MAX_TXNS_KEY;
import org.junit.After;
import org.junit.Before;
@ -72,6 +73,8 @@ public class TestStandbyInProgressTail {
conf.setBoolean(DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY, true);
conf.setInt(DFSConfigKeys.DFS_QJOURNAL_SELECT_INPUT_STREAMS_TIMEOUT_KEY,
500);
// Set very samll limit of transactions per a journal rpc call
conf.setInt(QJM_RPC_MAX_TXNS_KEY, 3);
HAUtil.setAllowStandbyReads(conf, true);
qjmhaCluster = new MiniQJMHACluster.Builder(conf).build();
cluster = qjmhaCluster.getDfsCluster();
@ -300,6 +303,31 @@ public class TestStandbyInProgressTail {
waitForFileInfo(nn1, "/test", "/test2", "/test3");
}
/**
* Test that Standby Node tails multiple segments while catching up
* during the transition to Active.
*/
@Test
public void testUndertailingWhileFailover() throws Exception {
cluster.transitionToActive(0);
cluster.waitActive(0);
String p = "/testFailoverWhileTailingWithoutCache/";
mkdirs(nn0, p + 0, p + 1, p + 2, p + 3, p + 4);
nn0.getRpcServer().rollEditLog(); // create segment 1
mkdirs(nn0, p + 5, p + 6, p + 7, p + 8, p + 9);
nn0.getRpcServer().rollEditLog(); // create segment 2
mkdirs(nn0, p + 10, p + 11, p + 12, p + 13, p + 14);
nn0.getRpcServer().rollEditLog(); // create segment 3
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
cluster.waitActive(1);
waitForFileInfo(nn1, p + 0, p + 1, p + 14);
}
@Test
public void testNonUniformConfig() throws Exception {
// Test case where some NNs (in this case the active NN) in the cluster