From d01e9c125b3f8b8803c73a781828b9b8b4d36043 Mon Sep 17 00:00:00 2001 From: Eric Badger Date: Tue, 17 Mar 2020 22:15:38 +0000 Subject: [PATCH] Revert "YARN-2710. RM HA tests failed intermittently on trunk. Contributed by Ahmed" This reverts commit 85eac8b0dfec30c2ecc7310cc65f6023097b2c26. --- .../yarn/client/ProtocolHATestBase.java | 45 ++++++++----------- .../TestApplicationClientProtocolOnHA.java | 39 +++++++--------- ...ionMasterServiceProtocolForTimelineV2.java | 5 +-- ...tApplicationMasterServiceProtocolOnHA.java | 11 ++--- .../yarn/client/TestResourceTrackerOnHA.java | 33 +++++--------- 5 files changed, 50 insertions(+), 83 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java index 9bfd606cfdf..5a947e4b04a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java @@ -18,12 +18,9 @@ package org.apache.hadoop.yarn.client; -import com.google.common.base.Supplier; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Map; -import java.util.concurrent.TimeoutException; -import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; @@ -172,7 +169,7 @@ public abstract class ProtocolHATestBase extends ClientBaseWithFixes { keepRunning = true; conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); - conf.setInt(YarnConfiguration.CLIENT_FAILOVER_MAX_ATTEMPTS, 10); + conf.setInt(YarnConfiguration.CLIENT_FAILOVER_MAX_ATTEMPTS, 5); conf.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM2_NODE_ID); HATestUtil.setRpcAddressForRM(RM1_NODE_ID, RM1_PORT_BASE, conf); HATestUtil.setRpcAddressForRM(RM2_NODE_ID, RM2_PORT_BASE, conf); @@ -226,28 +223,22 @@ public abstract class ProtocolHATestBase extends ClientBaseWithFixes { verifyClientConnection(); } - protected void verifyClientConnection() throws InterruptedException { - try { - GenericTestUtils.waitFor(new Supplier() { - @Override - public Boolean get() { - Configuration yarnConf = new YarnConfiguration(conf); - YarnClient client = ProtocolHATestBase.this - .createAndStartYarnClient(yarnConf); - try { - client.getApplications(); - return true; - } catch (YarnException | IOException ex) { - LOG.error(ex.getMessage()); - } finally { - client.stop(); - } - return false; - } - }, 50, 500); - } catch (TimeoutException e) { - fail("Client couldn't connect to the Active RM"); + protected void verifyClientConnection() { + int numRetries = 3; + while(numRetries-- > 0) { + Configuration conf = new YarnConfiguration(this.conf); + YarnClient client = createAndStartYarnClient(conf); + try { + Thread.sleep(100); + client.getApplications(); + return; + } catch (Exception e) { + LOG.error(e.getMessage()); + } finally { + client.stop(); + } } + fail("Client couldn't connect to the Active RM"); } protected Thread createAndStartFailoverThread() { @@ -337,11 +328,11 @@ public abstract class ProtocolHATestBase extends ClientBaseWithFixes { } private boolean waittingForFailOver() { - int maximumWaittingTime = 200; + int maximumWaittingTime = 50; int count = 0; while (!failoverTriggered.get() && count <= maximumWaittingTime) { try { - Thread.sleep(25); + Thread.sleep(100); } catch (InterruptedException e) { // DO NOTHING } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java index a7d8dea4e99..c9fa91513af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java @@ -44,9 +44,7 @@ import org.apache.hadoop.yarn.util.Records; import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.Timeout; public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { private YarnClient client = null; @@ -65,10 +63,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { } } - @Rule - public Timeout timeout = new Timeout(180000); - - @Test + @Test(timeout = 15000) public void testGetApplicationReportOnHA() throws Exception { ApplicationReport report = client.getApplicationReport(cluster.createFakeAppId()); @@ -76,7 +71,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { Assert.assertEquals(cluster.createFakeAppReport(), report); } - @Test + @Test(timeout = 15000) public void testGetNewApplicationOnHA() throws Exception { ApplicationId appId = client.createApplication().getApplicationSubmissionContext() @@ -85,7 +80,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { Assert.assertEquals(cluster.createFakeAppId(), appId); } - @Test + @Test(timeout = 15000) public void testGetClusterMetricsOnHA() throws Exception { YarnClusterMetrics clusterMetrics = client.getYarnClusterMetrics(); @@ -94,7 +89,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { clusterMetrics); } - @Test + @Test(timeout = 15000) public void testGetApplicationsOnHA() throws Exception { List reports = client.getApplications(); @@ -104,7 +99,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { reports); } - @Test + @Test(timeout = 15000) public void testGetClusterNodesOnHA() throws Exception { List reports = client.getNodeReports(NodeState.RUNNING); Assert.assertTrue(reports != null); @@ -113,7 +108,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { reports); } - @Test + @Test(timeout = 15000) public void testGetQueueInfoOnHA() throws Exception { QueueInfo queueInfo = client.getQueueInfo("root"); Assert.assertTrue(queueInfo != null); @@ -121,7 +116,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { queueInfo); } - @Test + @Test(timeout = 15000) public void testGetQueueUserAclsOnHA() throws Exception { List queueUserAclsList = client.getQueueAclsInfo(); Assert.assertTrue(queueUserAclsList != null); @@ -130,7 +125,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { queueUserAclsList); } - @Test + @Test(timeout = 15000) public void testGetApplicationAttemptReportOnHA() throws Exception { ApplicationAttemptReport report = client.getApplicationAttemptReport(cluster @@ -139,7 +134,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { Assert.assertEquals(cluster.createFakeApplicationAttemptReport(), report); } - @Test + @Test(timeout = 15000) public void testGetApplicationAttemptsOnHA() throws Exception { List reports = client.getApplicationAttempts(cluster.createFakeAppId()); @@ -149,7 +144,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { reports); } - @Test + @Test(timeout = 15000) public void testGetContainerReportOnHA() throws Exception { ContainerReport report = client.getContainerReport(cluster.createFakeContainerId()); @@ -157,7 +152,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { Assert.assertEquals(cluster.createFakeContainerReport(), report); } - @Test + @Test(timeout = 15000) public void testGetContainersOnHA() throws Exception { List reports = client.getContainers(cluster.createFakeApplicationAttemptId()); @@ -167,7 +162,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { reports); } - @Test + @Test(timeout = 15000) public void testSubmitApplicationOnHA() throws Exception { ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); @@ -184,23 +179,23 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { .containsKey(appId)); } - @Test + @Test(timeout = 15000) public void testMoveApplicationAcrossQueuesOnHA() throws Exception{ client.moveApplicationAcrossQueues(cluster.createFakeAppId(), "root"); } - @Test + @Test(timeout = 15000) public void testForceKillApplicationOnHA() throws Exception { client.killApplication(cluster.createFakeAppId()); } - @Test + @Test(timeout = 15000) public void testGetDelegationTokenOnHA() throws Exception { Token token = client.getRMDelegationToken(new Text(" ")); Assert.assertEquals(token, cluster.createFakeToken()); } - @Test + @Test(timeout = 15000) public void testRenewDelegationTokenOnHA() throws Exception { RenewDelegationTokenRequest request = RenewDelegationTokenRequest.newInstance(cluster.createFakeToken()); @@ -210,7 +205,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { Assert.assertEquals(newExpirationTime, cluster.createNextExpirationTime()); } - @Test + @Test(timeout = 15000) public void testCancelDelegationTokenOnHA() throws Exception { CancelDelegationTokenRequest request = CancelDelegationTokenRequest.newInstance(cluster.createFakeToken()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolForTimelineV2.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolForTimelineV2.java index 109f15f4e80..be8c3023d08 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolForTimelineV2.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolForTimelineV2.java @@ -34,7 +34,6 @@ import org.apache.hadoop.yarn.server.timelineservice.storage.TimelineWriter; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.junit.rules.Timeout; /** * Tests Application Master Protocol with timeline service v2 enabled. @@ -42,8 +41,6 @@ import org.junit.rules.Timeout; public class TestApplicationMasterServiceProtocolForTimelineV2 extends ApplicationMasterServiceProtoTestBase { - public Timeout timeout = new Timeout(180000); - @Before public void initialize() throws Exception { HATestUtil.setRpcAddressForRM(RM1_NODE_ID, RM1_PORT_BASE + 200, conf); @@ -56,7 +53,7 @@ public class TestApplicationMasterServiceProtocolForTimelineV2 super.startupHAAndSetupClient(); } - @Test + @Test(timeout = 15000) public void testAllocateForTimelineV2OnHA() throws YarnException, IOException { AllocateRequest request = AllocateRequest.newInstance(0, 50f, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolOnHA.java index 0cab4692338..c2f39a1d4ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceProtocolOnHA.java @@ -35,23 +35,18 @@ import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.exceptions.YarnException; import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.Timeout; public class TestApplicationMasterServiceProtocolOnHA extends ApplicationMasterServiceProtoTestBase { - @Rule - public Timeout timeout = new Timeout(180000); - @Before public void initialize() throws Exception { startHACluster(0, false, false, true); super.startupHAAndSetupClient(); } - @Test + @Test(timeout = 15000) public void testRegisterApplicationMasterOnHA() throws YarnException, IOException { RegisterApplicationMasterRequest request = @@ -62,7 +57,7 @@ public class TestApplicationMasterServiceProtocolOnHA this.cluster.createFakeRegisterApplicationMasterResponse()); } - @Test + @Test(timeout = 15000) public void testFinishApplicationMasterOnHA() throws YarnException, IOException { FinishApplicationMasterRequest request = @@ -74,7 +69,7 @@ public class TestApplicationMasterServiceProtocolOnHA this.cluster.createFakeFinishApplicationMasterResponse()); } - @Test + @Test(timeout = 15000) public void testAllocateOnHA() throws YarnException, IOException { AllocateRequest request = AllocateRequest.newInstance(0, 50f, new ArrayList(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java index 98ce9e09b18..338198bce61 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java @@ -18,10 +18,8 @@ package org.apache.hadoop.yarn.client; -import com.google.common.base.Supplier; import java.io.IOException; -import org.apache.hadoop.test.GenericTestUtils; import org.junit.Assert; import org.apache.hadoop.ipc.RPC; @@ -35,17 +33,12 @@ import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.util.YarnVersionInfo; import org.junit.After; import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.Timeout; -public class TestResourceTrackerOnHA extends ProtocolHATestBase { +public class TestResourceTrackerOnHA extends ProtocolHATestBase{ private ResourceTracker resourceTracker = null; - @Rule - public Timeout timeout = new Timeout(180000); - @Before public void initiate() throws Exception { startHACluster(0, false, true, false); @@ -59,7 +52,7 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase { } } - @Test + @Test(timeout = 15000) public void testResourceTrackerOnHA() throws Exception { NodeId nodeId = NodeId.newInstance("localhost", 0); Resource resource = Resource.newInstance(2048, 4); @@ -69,7 +62,7 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase { RegisterNodeManagerRequest.newInstance(nodeId, 0, resource, YarnVersionInfo.getVersion(), null, null); resourceTracker.registerNodeManager(request); - Assert.assertTrue(waitForNodeManagerToConnect(200, nodeId)); + Assert.assertTrue(waitForNodeManagerToConnect(10000, nodeId)); // restart the failover thread, and make sure nodeHeartbeat works failoverThread = createAndStartFailoverThread(); @@ -85,18 +78,14 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase { return ServerRMProxy.createRMProxy(this.conf, ResourceTracker.class); } - private boolean waitForNodeManagerToConnect(final int maxTime, - final NodeId nodeId) + private boolean waitForNodeManagerToConnect(int timeout, NodeId nodeId) throws Exception { - GenericTestUtils.waitFor( - new Supplier() { - @Override - public Boolean get() { - return TestResourceTrackerOnHA.this.getActiveRM().getRMContext() - .getRMNodes().containsKey(nodeId); - } - }, 20, - maxTime); - return true; + for (int i = 0; i < timeout / 100; i++) { + if (getActiveRM().getRMContext().getRMNodes().containsKey(nodeId)) { + return true; + } + Thread.sleep(100); + } + return false; } }