YARN-1607. TestRM relies on the scheduler assigning multiple containers in a single node update (Sandy Ryza)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1560533 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2de391de9f
commit
8ceddeb0c3
|
@ -357,6 +357,9 @@ Release 2.4.0 - UNRELEASED
|
||||||
YARN-1606. Fix the default value of yarn.resourcemanager.zk-timeout-ms
|
YARN-1606. Fix the default value of yarn.resourcemanager.zk-timeout-ms
|
||||||
in yarn-default.xml (kasha)
|
in yarn-default.xml (kasha)
|
||||||
|
|
||||||
|
YARN-1607. TestRM relies on the scheduler assigning multiple containers in
|
||||||
|
a single node update (Sandy Ryza)
|
||||||
|
|
||||||
Release 2.3.0 - UNRELEASED
|
Release 2.3.0 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
@ -38,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
import org.apache.hadoop.yarn.api.records.NMToken;
|
import org.apache.hadoop.yarn.api.records.NMToken;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.Token;
|
import org.apache.hadoop.yarn.api.records.Token;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
|
@ -46,6 +48,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
|
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
|
@ -56,6 +59,9 @@ public class TestRM {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(TestRM.class);
|
private static final Log LOG = LogFactory.getLog(TestRM.class);
|
||||||
|
|
||||||
|
// Milliseconds to sleep for when waiting for something to happen
|
||||||
|
private final static int WAIT_SLEEP_MS = 100;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetNewAppId() throws Exception {
|
public void testGetNewAppId() throws Exception {
|
||||||
Logger rootLogger = LogManager.getRootLogger();
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
|
@ -69,7 +75,7 @@ public class TestRM {
|
||||||
rm.stop();
|
rm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test (timeout = 30000)
|
||||||
public void testAppWithNoContainers() throws Exception {
|
public void testAppWithNoContainers() throws Exception {
|
||||||
Logger rootLogger = LogManager.getRootLogger();
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
rootLogger.setLevel(Level.DEBUG);
|
rootLogger.setLevel(Level.DEBUG);
|
||||||
|
@ -91,7 +97,7 @@ public class TestRM {
|
||||||
rm.stop();
|
rm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test (timeout = 30000)
|
||||||
public void testAppOnMultiNode() throws Exception {
|
public void testAppOnMultiNode() throws Exception {
|
||||||
Logger rootLogger = LogManager.getRootLogger();
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
rootLogger.setLevel(Level.DEBUG);
|
rootLogger.setLevel(Level.DEBUG);
|
||||||
|
@ -116,30 +122,30 @@ public class TestRM {
|
||||||
am.allocate("h1" , 1000, request, new ArrayList<ContainerId>());
|
am.allocate("h1" , 1000, request, new ArrayList<ContainerId>());
|
||||||
|
|
||||||
//kick the scheduler
|
//kick the scheduler
|
||||||
nm1.nodeHeartbeat(true);
|
|
||||||
List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
|
List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
new ArrayList<ContainerId>()).getAllocatedContainers();
|
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||||
int contReceived = conts.size();
|
int contReceived = conts.size();
|
||||||
while (contReceived < 3) {//only 3 containers are available on node1
|
while (contReceived < 3) {//only 3 containers are available on node1
|
||||||
|
nm1.nodeHeartbeat(true);
|
||||||
conts.addAll(am.allocate(new ArrayList<ResourceRequest>(),
|
conts.addAll(am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
new ArrayList<ContainerId>()).getAllocatedContainers());
|
new ArrayList<ContainerId>()).getAllocatedContainers());
|
||||||
contReceived = conts.size();
|
contReceived = conts.size();
|
||||||
LOG.info("Got " + contReceived + " containers. Waiting to get " + 3);
|
LOG.info("Got " + contReceived + " containers. Waiting to get " + 3);
|
||||||
Thread.sleep(2000);
|
Thread.sleep(WAIT_SLEEP_MS);
|
||||||
}
|
}
|
||||||
Assert.assertEquals(3, conts.size());
|
Assert.assertEquals(3, conts.size());
|
||||||
|
|
||||||
//send node2 heartbeat
|
//send node2 heartbeat
|
||||||
nm2.nodeHeartbeat(true);
|
|
||||||
conts = am.allocate(new ArrayList<ResourceRequest>(),
|
conts = am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
new ArrayList<ContainerId>()).getAllocatedContainers();
|
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||||
contReceived = conts.size();
|
contReceived = conts.size();
|
||||||
while (contReceived < 10) {
|
while (contReceived < 10) {
|
||||||
|
nm2.nodeHeartbeat(true);
|
||||||
conts.addAll(am.allocate(new ArrayList<ResourceRequest>(),
|
conts.addAll(am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
new ArrayList<ContainerId>()).getAllocatedContainers());
|
new ArrayList<ContainerId>()).getAllocatedContainers());
|
||||||
contReceived = conts.size();
|
contReceived = conts.size();
|
||||||
LOG.info("Got " + contReceived + " containers. Waiting to get " + 10);
|
LOG.info("Got " + contReceived + " containers. Waiting to get " + 10);
|
||||||
Thread.sleep(2000);
|
Thread.sleep(WAIT_SLEEP_MS);
|
||||||
}
|
}
|
||||||
Assert.assertEquals(10, conts.size());
|
Assert.assertEquals(10, conts.size());
|
||||||
|
|
||||||
|
@ -150,7 +156,7 @@ public class TestRM {
|
||||||
rm.stop();
|
rm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test (timeout = 40000)
|
||||||
public void testNMToken() throws Exception {
|
public void testNMToken() throws Exception {
|
||||||
MockRM rm = new MockRM();
|
MockRM rm = new MockRM();
|
||||||
try {
|
try {
|
||||||
|
@ -187,19 +193,17 @@ public class TestRM {
|
||||||
// initially requesting 2 containers.
|
// initially requesting 2 containers.
|
||||||
AllocateResponse response =
|
AllocateResponse response =
|
||||||
am.allocate("h1", 1000, 2, releaseContainerList);
|
am.allocate("h1", 1000, 2, releaseContainerList);
|
||||||
nm1.nodeHeartbeat(true);
|
|
||||||
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
||||||
allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2,
|
allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2,
|
||||||
nmTokens);
|
nmTokens, nm1);
|
||||||
Assert.assertEquals(1, nmTokens.size());
|
Assert.assertEquals(1, nmTokens.size());
|
||||||
|
|
||||||
|
|
||||||
// requesting 2 more containers.
|
// requesting 2 more containers.
|
||||||
response = am.allocate("h1", 1000, 2, releaseContainerList);
|
response = am.allocate("h1", 1000, 2, releaseContainerList);
|
||||||
nm1.nodeHeartbeat(true);
|
|
||||||
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
||||||
allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4,
|
allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4,
|
||||||
nmTokens);
|
nmTokens, nm1);
|
||||||
Assert.assertEquals(1, nmTokens.size());
|
Assert.assertEquals(1, nmTokens.size());
|
||||||
|
|
||||||
|
|
||||||
|
@ -211,23 +215,27 @@ public class TestRM {
|
||||||
new ArrayList<Container>();
|
new ArrayList<Container>();
|
||||||
|
|
||||||
response = am.allocate("h2", 1000, 2, releaseContainerList);
|
response = am.allocate("h2", 1000, 2, releaseContainerList);
|
||||||
nm2.nodeHeartbeat(true);
|
|
||||||
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
||||||
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2,
|
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2,
|
||||||
nmTokens);
|
nmTokens, nm2);
|
||||||
Assert.assertEquals(2, nmTokens.size());
|
Assert.assertEquals(2, nmTokens.size());
|
||||||
|
|
||||||
// Simulating NM-2 restart.
|
// Simulating NM-2 restart.
|
||||||
nm2 = rm.registerNode("h2:1234", 10000);
|
nm2 = rm.registerNode("h2:1234", 10000);
|
||||||
nm2.nodeHeartbeat(true);
|
// Wait for reconnect to make it through the RM and create a new RMNode
|
||||||
|
Map<NodeId, RMNode> nodes = rm.getRMContext().getRMNodes();
|
||||||
|
while (nodes.get(nm2.getNodeId()).getLastNodeHeartBeatResponse()
|
||||||
|
.getResponseId() > 0) {
|
||||||
|
Thread.sleep(WAIT_SLEEP_MS);
|
||||||
|
}
|
||||||
|
|
||||||
int interval = 40;
|
int interval = 40;
|
||||||
// Wait for nm Token to be cleared.
|
// Wait for nm Token to be cleared.
|
||||||
while (nmTokenSecretManager
|
while (nmTokenSecretManager
|
||||||
.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
|
.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
|
||||||
nm2.getNodeId()) && interval-- > 0) {
|
nm2.getNodeId()) && interval-- > 0) {
|
||||||
LOG.info("waiting for nmToken to be cleared for : " + nm2.getNodeId());
|
LOG.info("waiting for nmToken to be cleared for : " + nm2.getNodeId());
|
||||||
Thread.sleep(1000);
|
Thread.sleep(WAIT_SLEEP_MS);
|
||||||
}
|
}
|
||||||
Assert.assertTrue(nmTokenSecretManager
|
Assert.assertTrue(nmTokenSecretManager
|
||||||
.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
|
.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
|
||||||
|
@ -238,10 +246,9 @@ public class TestRM {
|
||||||
|
|
||||||
// We should again receive the NMToken.
|
// We should again receive the NMToken.
|
||||||
response = am.allocate("h2", 1000, 2, releaseContainerList);
|
response = am.allocate("h2", 1000, 2, releaseContainerList);
|
||||||
nm2.nodeHeartbeat(true);
|
|
||||||
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
||||||
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4,
|
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4,
|
||||||
nmTokens);
|
nmTokens, nm2);
|
||||||
Assert.assertEquals(2, nmTokens.size());
|
Assert.assertEquals(2, nmTokens.size());
|
||||||
|
|
||||||
// Now rolling over NMToken masterKey. it should resend the NMToken in
|
// Now rolling over NMToken masterKey. it should resend the NMToken in
|
||||||
|
@ -270,10 +277,9 @@ public class TestRM {
|
||||||
Assert.assertEquals(0, nmTokens.size());
|
Assert.assertEquals(0, nmTokens.size());
|
||||||
// We should again receive the NMToken.
|
// We should again receive the NMToken.
|
||||||
response = am.allocate("h2", 1000, 1, releaseContainerList);
|
response = am.allocate("h2", 1000, 1, releaseContainerList);
|
||||||
nm2.nodeHeartbeat(true);
|
|
||||||
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
Assert.assertEquals(0, response.getAllocatedContainers().size());
|
||||||
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5,
|
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5,
|
||||||
nmTokens);
|
nmTokens, nm2);
|
||||||
Assert.assertEquals(1, nmTokens.size());
|
Assert.assertEquals(1, nmTokens.size());
|
||||||
Assert.assertTrue(nmTokenSecretManager
|
Assert.assertTrue(nmTokenSecretManager
|
||||||
.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
|
.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
|
||||||
|
@ -305,12 +311,14 @@ public class TestRM {
|
||||||
|
|
||||||
protected void allocateContainersAndValidateNMTokens(MockAM am,
|
protected void allocateContainersAndValidateNMTokens(MockAM am,
|
||||||
ArrayList<Container> containersReceived, int totalContainerRequested,
|
ArrayList<Container> containersReceived, int totalContainerRequested,
|
||||||
HashMap<String, Token> nmTokens) throws Exception, InterruptedException {
|
HashMap<String, Token> nmTokens, MockNM nm) throws Exception,
|
||||||
|
InterruptedException {
|
||||||
ArrayList<ContainerId> releaseContainerList = new ArrayList<ContainerId>();
|
ArrayList<ContainerId> releaseContainerList = new ArrayList<ContainerId>();
|
||||||
AllocateResponse response;
|
AllocateResponse response;
|
||||||
ArrayList<ResourceRequest> resourceRequest =
|
ArrayList<ResourceRequest> resourceRequest =
|
||||||
new ArrayList<ResourceRequest>();
|
new ArrayList<ResourceRequest>();
|
||||||
while (containersReceived.size() < totalContainerRequested) {
|
while (containersReceived.size() < totalContainerRequested) {
|
||||||
|
nm.nodeHeartbeat(true);
|
||||||
LOG.info("requesting containers..");
|
LOG.info("requesting containers..");
|
||||||
response =
|
response =
|
||||||
am.allocate(resourceRequest, releaseContainerList);
|
am.allocate(resourceRequest, releaseContainerList);
|
||||||
|
@ -326,7 +334,7 @@ public class TestRM {
|
||||||
}
|
}
|
||||||
LOG.info("Got " + containersReceived.size()
|
LOG.info("Got " + containersReceived.size()
|
||||||
+ " containers. Waiting to get " + totalContainerRequested);
|
+ " containers. Waiting to get " + totalContainerRequested);
|
||||||
Thread.sleep(500);
|
Thread.sleep(WAIT_SLEEP_MS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue