YARN-9133. Make tests more easy to comprehend in TestGpuResourceHandler. Contributed by Peter Bacsko

This commit is contained in:
Szilard Nemeth 2019-08-14 17:13:25 +02:00
parent 2432356570
commit 3e0410449f
1 changed files with 214 additions and 158 deletions

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu; package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
@ -28,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
@ -43,9 +46,10 @@ import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreServic
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.util.resource.CustomResourceTypesConfigurationProvider; import org.apache.hadoop.yarn.util.resource.CustomResourceTypesConfigurationProvider;
import org.junit.After; import org.junit.After;
import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
@ -53,11 +57,13 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.anyString;
@ -65,7 +71,6 @@ import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never; import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@ -103,11 +108,14 @@ public class TestGpuResourceHandlerImpl {
} }
private File setupFakeGpuDiscoveryBinary() throws IOException { private File setupFakeGpuDiscoveryBinary() throws IOException {
File fakeBinary = new File(getTestParentDirectory() + "/fake-nvidia-smi"); File fakeBinary = new File(getTestParentDirectory() + "/nvidia-smi");
touchFile(fakeBinary); touchFile(fakeBinary);
return fakeBinary; return fakeBinary;
} }
@Rule
public ExpectedException expected = ExpectedException.none();
@Before @Before
public void setup() throws IOException { public void setup() throws IOException {
createTestDataDirectory(); createTestDataDirectory();
@ -120,16 +128,20 @@ public class TestGpuResourceHandlerImpl {
mockNMStateStore = mock(NMStateStoreService.class); mockNMStateStore = mock(NMStateStoreService.class);
Configuration conf = new Configuration(); Configuration conf = new Configuration();
Context nmContext = createMockNmContext(conf);
gpuDiscoverer = new GpuDiscoverer();
gpuResourceHandler = new GpuResourceHandlerImpl(nmContext,
mockCGroupsHandler, mockPrivilegedExecutor, gpuDiscoverer);
}
private Context createMockNmContext(Configuration conf) {
Context nmctx = mock(Context.class); Context nmctx = mock(Context.class);
when(nmctx.getNMStateStore()).thenReturn(mockNMStateStore); when(nmctx.getNMStateStore()).thenReturn(mockNMStateStore);
when(nmctx.getConf()).thenReturn(conf); when(nmctx.getConf()).thenReturn(conf);
runningContainersMap = new ConcurrentHashMap<>(); runningContainersMap = new ConcurrentHashMap<>();
when(nmctx.getContainers()).thenReturn(runningContainersMap); when(nmctx.getContainers()).thenReturn(runningContainersMap);
return nmctx;
gpuDiscoverer = new GpuDiscoverer();
gpuResourceHandler = new GpuResourceHandlerImpl(nmctx, mockCGroupsHandler,
mockPrivilegedExecutor, gpuDiscoverer);
} }
@After @After
@ -138,54 +150,63 @@ public class TestGpuResourceHandlerImpl {
} }
@Test @Test
public void testBootStrap() throws Exception { public void testBootstrapWithRealGpuDiscoverer() throws Exception {
Configuration conf = createDefaultConfig(); Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0"); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0");
gpuDiscoverer.initialize(conf); gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf); gpuResourceHandler.bootstrap(conf);
verify(mockCGroupsHandler, times(1)).initializeCGroupController(
List<GpuDevice> allowedGpus =
gpuResourceHandler.getGpuAllocator().getAllowedGpusCopy();
assertEquals("Unexpected number of allowed GPU devices!", 1,
allowedGpus.size());
assertEquals("Expected GPU device does not equal to found device!",
new GpuDevice(0, 0), allowedGpus.get(0));
verify(mockCGroupsHandler).initializeCGroupController(
CGroupsHandler.CGroupController.DEVICES); CGroupsHandler.CGroupController.DEVICES);
} }
@Test
public void testBootstrapWithMockGpuDiscoverer() throws Exception {
GpuDiscoverer mockDiscoverer = mock(GpuDiscoverer.class);
Configuration conf = new YarnConfiguration();
mockDiscoverer.initialize(conf);
expected.expect(ResourceHandlerException.class);
gpuResourceHandler.bootstrap(conf);
}
private static ContainerId getContainerId(int id) { private static ContainerId getContainerId(int id) {
return ContainerId.newContainerId(ApplicationAttemptId return ContainerId.newContainerId(ApplicationAttemptId
.newInstance(ApplicationId.newInstance(1234L, 1), 1), id); .newInstance(ApplicationId.newInstance(1234L, 1), 1), id);
} }
private static Container mockContainerWithGpuRequest(int id, int numGpuRequest, private static Container mockContainerWithGpuRequest(int id, Resource res,
boolean dockerContainerEnabled) { ContainerLaunchContext launchContext) {
Container c = mock(Container.class); Container c = mock(Container.class);
when(c.getContainerId()).thenReturn(getContainerId(id)); when(c.getContainerId()).thenReturn(getContainerId(id));
Resource res = Resource.newInstance(1024, 1);
ResourceMappings resMapping = new ResourceMappings();
res.setResourceValue(ResourceInformation.GPU_URI, numGpuRequest);
when(c.getResource()).thenReturn(res); when(c.getResource()).thenReturn(res);
when(c.getResourceMappings()).thenReturn(resMapping); when(c.getResourceMappings()).thenReturn(new ResourceMappings());
when(c.getLaunchContext()).thenReturn(launchContext);
ContainerLaunchContext clc = mock(ContainerLaunchContext.class);
Map<String, String> env = new HashMap<>();
if (dockerContainerEnabled) {
env.put(ContainerRuntimeConstants.ENV_CONTAINER_TYPE,
ContainerRuntimeConstants.CONTAINER_RUNTIME_DOCKER);
}
when(clc.getEnvironment()).thenReturn(env);
when(c.getLaunchContext()).thenReturn(clc);
return c; return c;
} }
private static Resource createResourceRequest(int numGpuRequest) {
Resource res = Resource.newInstance(1024, 1);
res.setResourceValue(ResourceInformation.GPU_URI, numGpuRequest);
return res;
}
private static Container mockContainerWithGpuRequest(int id, private static Container mockContainerWithGpuRequest(int id,
int numGpuRequest) { Resource res) {
return mockContainerWithGpuRequest(id, numGpuRequest, false); return mockContainerWithGpuRequest(id, res, createLaunchContext());
} }
private void verifyDeniedDevices(ContainerId containerId, private void verifyDeniedDevices(ContainerId containerId,
List<GpuDevice> deniedDevices) List<GpuDevice> deniedDevices)
throws ResourceHandlerException, PrivilegedOperationException { throws ResourceHandlerException, PrivilegedOperationException {
verify(mockCGroupsHandler, times(1)).createCGroup( verify(mockCGroupsHandler).createCGroup(
CGroupsHandler.CGroupController.DEVICES, containerId.toString()); CGroupsHandler.CGroupController.DEVICES, containerId.toString());
if (null != deniedDevices && !deniedDevices.isEmpty()) { if (null != deniedDevices && !deniedDevices.isEmpty()) {
@ -193,7 +214,7 @@ public class TestGpuResourceHandlerImpl {
for (GpuDevice deniedDevice : deniedDevices) { for (GpuDevice deniedDevice : deniedDevices) {
deniedDevicesMinorNumber.add(deniedDevice.getMinorNumber()); deniedDevicesMinorNumber.add(deniedDevice.getMinorNumber());
} }
verify(mockPrivilegedExecutor, times(1)).executePrivilegedOperation( verify(mockPrivilegedExecutor).executePrivilegedOperation(
new PrivilegedOperation(PrivilegedOperation.OperationType.GPU, Arrays new PrivilegedOperation(PrivilegedOperation.OperationType.GPU, Arrays
.asList(GpuResourceHandlerImpl.CONTAINER_ID_CLI_OPTION, .asList(GpuResourceHandlerImpl.CONTAINER_ID_CLI_OPTION,
containerId.toString(), containerId.toString(),
@ -202,104 +223,139 @@ public class TestGpuResourceHandlerImpl {
} }
} }
private void commonTestAllocation(boolean dockerContainerEnabled) private static ContainerLaunchContext createLaunchContextDocker() {
throws Exception { ContainerLaunchContext launchContext = mock(ContainerLaunchContext.class);
ImmutableMap<String, String> env = ImmutableMap.<String, String>builder()
.put(ContainerRuntimeConstants.ENV_CONTAINER_TYPE,
ContainerRuntimeConstants.CONTAINER_RUNTIME_DOCKER)
.build();
when(launchContext.getEnvironment()).thenReturn(env);
return launchContext;
}
private static ContainerLaunchContext createLaunchContext() {
ContainerLaunchContext launchContext = mock(ContainerLaunchContext.class);
when(launchContext.getEnvironment()).thenReturn(Maps.newHashMap());
return launchContext;
}
private void startContainerWithGpuRequestsDocker(int id, int gpus)
throws ResourceHandlerException {
gpuResourceHandler.preStart(
mockContainerWithGpuRequest(id, createResourceRequest(gpus),
createLaunchContextDocker()));
}
private void startContainerWithGpuRequests(int id, int gpus)
throws ResourceHandlerException {
gpuResourceHandler.preStart(
mockContainerWithGpuRequest(id, createResourceRequest(gpus),
createLaunchContext()));
}
private void verifyNumberOfAvailableGpus(int expectedAvailable,
GpuResourceHandlerImpl resourceHandler) {
assertEquals("Unexpected number of available GPU devices!",
expectedAvailable,
resourceHandler.getGpuAllocator().getAvailableGpus());
}
private void verifyCgroupsDeletedForContainer(int i)
throws ResourceHandlerException {
verify(mockCGroupsHandler).createCGroup(
CGroupsHandler.CGroupController.DEVICES, getContainerId(i).toString());
}
private void initializeGpus() throws YarnException, IOException {
Configuration conf = createDefaultConfig(); Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer = new GpuDiscoverer();
gpuDiscoverer.initialize(conf); gpuDiscoverer.initialize(conf);
Context nmContext = createMockNmContext(conf);
gpuResourceHandler = new GpuResourceHandlerImpl(nmContext,
mockCGroupsHandler, mockPrivilegedExecutor, gpuDiscoverer);
gpuResourceHandler.bootstrap(conf); gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4, verifyNumberOfAvailableGpus(4, gpuResourceHandler);
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
/* Start container 1, asks 3 containers */
gpuResourceHandler.preStart(
mockContainerWithGpuRequest(1, 3, dockerContainerEnabled));
// Only device=4 will be blocked.
if (dockerContainerEnabled) {
verifyDeniedDevices(getContainerId(1), Collections.emptyList());
} else{
verifyDeniedDevices(getContainerId(1), Arrays.asList(new GpuDevice(3,4)));
}
/* Start container 2, asks 2 containers. Excepted to fail */
boolean failedToAllocate = false;
try {
gpuResourceHandler.preStart(
mockContainerWithGpuRequest(2, 2, dockerContainerEnabled));
} catch (ResourceHandlerException e) {
failedToAllocate = true;
}
Assert.assertTrue(failedToAllocate);
/* Start container 3, ask 1 container, succeeded */
gpuResourceHandler.preStart(
mockContainerWithGpuRequest(3, 1, dockerContainerEnabled));
// devices = 0/1/3 will be blocked
if (dockerContainerEnabled) {
verifyDeniedDevices(getContainerId(3), Collections.emptyList());
} else {
verifyDeniedDevices(getContainerId(3), Arrays
.asList(new GpuDevice(0, 0), new GpuDevice(1, 1),
new GpuDevice(2, 3)));
}
/* Start container 4, ask 0 container, succeeded */
gpuResourceHandler.preStart(
mockContainerWithGpuRequest(4, 0, dockerContainerEnabled));
if (dockerContainerEnabled) {
verifyDeniedDevices(getContainerId(4), Collections.emptyList());
} else{
// All devices will be blocked
verifyDeniedDevices(getContainerId(4), Arrays
.asList(new GpuDevice(0, 0), new GpuDevice(1, 1), new GpuDevice(2, 3),
new GpuDevice(3, 4)));
}
/* Release container-1, expect cgroups deleted */
gpuResourceHandler.postComplete(getContainerId(1));
verify(mockCGroupsHandler, times(1)).createCGroup(
CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString());
Assert.assertEquals(3,
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
/* Release container-3, expect cgroups deleted */
gpuResourceHandler.postComplete(getContainerId(3));
verify(mockCGroupsHandler, times(1)).createCGroup(
CGroupsHandler.CGroupController.DEVICES, getContainerId(3).toString());
Assert.assertEquals(4,
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
} }
@Test @Test
public void testAllocationWhenDockerContainerEnabled() throws Exception { public void testAllocationWhenDockerContainerEnabled() throws Exception {
// When docker container is enabled, no devices should be written to // When docker container is enabled, no devices should be written to
// devices.deny. // devices.deny.
commonTestAllocation(true); initializeGpus();
startContainerWithGpuRequestsDocker(1, 3);
verifyDeniedDevices(getContainerId(1), Collections.emptyList());
/* Start container 2, asks 2 containers. Excepted to fail */
boolean failedToAllocate = false;
try {
startContainerWithGpuRequestsDocker(2, 2);
} catch (ResourceHandlerException e) {
failedToAllocate = true;
}
assertTrue("Container allocation is expected to fail!", failedToAllocate);
startContainerWithGpuRequestsDocker(3, 1);
verifyDeniedDevices(getContainerId(3), Collections.emptyList());
startContainerWithGpuRequestsDocker(4, 0);
verifyDeniedDevices(getContainerId(4), Collections.emptyList());
gpuResourceHandler.postComplete(getContainerId(1));
verifyCgroupsDeletedForContainer(1);
verifyNumberOfAvailableGpus(3, gpuResourceHandler);
gpuResourceHandler.postComplete(getContainerId(3));
verifyCgroupsDeletedForContainer(3);
verifyNumberOfAvailableGpus(4, gpuResourceHandler);
} }
@Test @Test
public void testAllocation() throws Exception { public void testAllocation() throws Exception {
commonTestAllocation(false); initializeGpus();
//Start container 1, asks 3 containers --> Only device=4 will be blocked.
startContainerWithGpuRequests(1, 3);
verifyDeniedDevices(getContainerId(1),
Collections.singletonList(new GpuDevice(3, 4)));
/* Start container 2, asks 2 containers. Excepted to fail */
boolean failedToAllocate = false;
try {
startContainerWithGpuRequests(2, 2);
} catch (ResourceHandlerException e) {
failedToAllocate = true;
}
assertTrue("Container allocation is expected to fail!", failedToAllocate);
// Start container 3, ask 1 container, succeeded
// devices = 0/1/3 will be blocked
startContainerWithGpuRequests(3, 1);
verifyDeniedDevices(getContainerId(3), Arrays.asList(new GpuDevice(0, 0),
new GpuDevice(1, 1), new GpuDevice(2, 3)));
// Start container 4, ask 0 container, succeeded
// --> All devices will be blocked
startContainerWithGpuRequests(4, 0);
verifyDeniedDevices(getContainerId(4), Arrays.asList(new GpuDevice(0, 0),
new GpuDevice(1, 1), new GpuDevice(2, 3), new GpuDevice(3, 4)));
gpuResourceHandler.postComplete(getContainerId(1));
verifyCgroupsDeletedForContainer(1);
verifyNumberOfAvailableGpus(3, gpuResourceHandler);
gpuResourceHandler.postComplete(getContainerId(3));
verifyCgroupsDeletedForContainer(3);
verifyNumberOfAvailableGpus(4, gpuResourceHandler);
} }
@SuppressWarnings("unchecked")
@Test @Test
public void testAssignedGpuWillBeCleanedupWhenStoreOpFails() public void testAssignedGpuWillBeCleanedUpWhenStoreOpFails()
throws Exception { throws Exception {
Configuration conf = createDefaultConfig(); initializeGpus();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
doThrow(new IOException("Exception ...")).when(mockNMStateStore) doThrow(new IOException("Exception ...")).when(mockNMStateStore)
.storeAssignedResources( .storeAssignedResources(
@ -308,16 +364,16 @@ public class TestGpuResourceHandlerImpl {
boolean exception = false; boolean exception = false;
/* Start container 1, asks 3 containers */ /* Start container 1, asks 3 containers */
try { try {
gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 3)); gpuResourceHandler.preStart(mockContainerWithGpuRequest(1,
createResourceRequest(3)));
} catch (ResourceHandlerException e) { } catch (ResourceHandlerException e) {
exception = true; exception = true;
} }
Assert.assertTrue("preStart should throw exception", exception); assertTrue("preStart should throw exception", exception);
// After preStart, we still have 4 available GPU since the store op fails. // After preStart, we still have 4 available GPU since the store op failed.
Assert.assertEquals(4, verifyNumberOfAvailableGpus(4, gpuResourceHandler);
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
} }
@Test @Test
@ -328,45 +384,40 @@ public class TestGpuResourceHandlerImpl {
try { try {
gpuResourceHandler.bootstrap(conf); gpuResourceHandler.bootstrap(conf);
Assert.fail("Should fail because no GPU available"); fail("Should fail because no GPU available");
} catch (ResourceHandlerException e) { } catch (ResourceHandlerException e) {
// Expected because of no resource available // Expected because of no resource available
} }
/* Start container 1, asks 0 containers */ /* Start container 1, asks 0 containers */
gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 0)); gpuResourceHandler.preStart(mockContainerWithGpuRequest(1,
createResourceRequest(0)));
verifyDeniedDevices(getContainerId(1), Collections.emptyList()); verifyDeniedDevices(getContainerId(1), Collections.emptyList());
/* Start container 2, asks 1 containers. Excepted to fail */ /* Start container 2, asks 1 containers. Excepted to fail */
boolean failedToAllocate = false; boolean failedToAllocate = false;
try { try {
gpuResourceHandler.preStart(mockContainerWithGpuRequest(2, 1)); gpuResourceHandler.preStart(mockContainerWithGpuRequest(2,
createResourceRequest(1)));
} catch (ResourceHandlerException e) { } catch (ResourceHandlerException e) {
failedToAllocate = true; failedToAllocate = true;
} }
Assert.assertTrue(failedToAllocate); assertTrue("Container allocation is expected to fail!", failedToAllocate);
/* Release container 1, expect cgroups deleted */ /* Release container 1, expect cgroups deleted */
gpuResourceHandler.postComplete(getContainerId(1)); gpuResourceHandler.postComplete(getContainerId(1));
verify(mockCGroupsHandler, times(1)).createCGroup( verifyCgroupsDeletedForContainer(1);
CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString()); verifyNumberOfAvailableGpus(0, gpuResourceHandler);
Assert.assertEquals(0,
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
} }
@Test @Test
public void testAllocationStored() throws Exception { public void testAllocationStored() throws Exception {
Configuration conf = createDefaultConfig(); initializeGpus();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
/* Start container 1, asks 3 containers */ /* Start container 1, asks 3 containers */
Container container = mockContainerWithGpuRequest(1, 3); Container container = mockContainerWithGpuRequest(1,
createResourceRequest(3));
gpuResourceHandler.preStart(container); gpuResourceHandler.preStart(container);
verify(mockNMStateStore).storeAssignedResources(container, verify(mockNMStateStore).storeAssignedResources(container,
@ -375,16 +426,18 @@ public class TestGpuResourceHandlerImpl {
new GpuDevice(2, 3))); new GpuDevice(2, 3)));
// Only device=4 will be blocked. // Only device=4 will be blocked.
verifyDeniedDevices(getContainerId(1), Arrays.asList(new GpuDevice(3, 4))); verifyDeniedDevices(getContainerId(1),
Collections.singletonList(new GpuDevice(3, 4)));
/* Start container 2, ask 0 container, succeeded */ /* Start container 2, ask 0 container, succeeded */
container = mockContainerWithGpuRequest(2, 0); container = mockContainerWithGpuRequest(2, createResourceRequest(0));
gpuResourceHandler.preStart(container); gpuResourceHandler.preStart(container);
verifyDeniedDevices(getContainerId(2), Arrays verifyDeniedDevices(getContainerId(2), Arrays
.asList(new GpuDevice(0, 0), new GpuDevice(1, 1), new GpuDevice(2, 3), .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), new GpuDevice(2, 3),
new GpuDevice(3, 4))); new GpuDevice(3, 4)));
Assert.assertEquals(0, container.getResourceMappings() assertEquals("Number of GPU device allocations is not the expected!", 0,
container.getResourceMappings()
.getAssignedResources(ResourceInformation.GPU_URI).size()); .getAssignedResources(ResourceInformation.GPU_URI).size());
// Store assigned resource will not be invoked. // Store assigned resource will not be invoked.
@ -394,7 +447,8 @@ public class TestGpuResourceHandlerImpl {
@Test @Test
public void testAllocationStoredWithNULLStateStore() throws Exception { public void testAllocationStoredWithNULLStateStore() throws Exception {
NMNullStateStoreService mockNMNULLStateStore = mock(NMNullStateStoreService.class); NMNullStateStoreService mockNMNULLStateStore =
mock(NMNullStateStoreService.class);
Configuration conf = createDefaultConfig(); Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
@ -410,11 +464,11 @@ public class TestGpuResourceHandlerImpl {
gpuDiscoverer.initialize(conf); gpuDiscoverer.initialize(conf);
gpuNULLStateResourceHandler.bootstrap(conf); gpuNULLStateResourceHandler.bootstrap(conf);
Assert.assertEquals(4, verifyNumberOfAvailableGpus(4, gpuNULLStateResourceHandler);
gpuNULLStateResourceHandler.getGpuAllocator().getAvailableGpus());
/* Start container 1, asks 3 containers */ /* Start container 1, asks 3 containers */
Container container = mockContainerWithGpuRequest(1, 3); Container container = mockContainerWithGpuRequest(1,
createResourceRequest(3));
gpuNULLStateResourceHandler.preStart(container); gpuNULLStateResourceHandler.preStart(container);
verify(nmnctx.getNMStateStore()).storeAssignedResources(container, verify(nmnctx.getNMStateStore()).storeAssignedResources(container,
@ -425,13 +479,7 @@ public class TestGpuResourceHandlerImpl {
@Test @Test
public void testRecoverResourceAllocation() throws Exception { public void testRecoverResourceAllocation() throws Exception {
Configuration conf = createDefaultConfig(); initializeGpus();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
Container nmContainer = mock(Container.class); Container nmContainer = mock(Container.class);
ResourceMappings rmap = new ResourceMappings(); ResourceMappings rmap = new ResourceMappings();
@ -450,12 +498,14 @@ public class TestGpuResourceHandlerImpl {
Map<GpuDevice, ContainerId> deviceAllocationMapping = Map<GpuDevice, ContainerId> deviceAllocationMapping =
gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy(); gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy();
Assert.assertEquals(2, deviceAllocationMapping.size()); assertEquals("Unexpected number of allocated GPU devices!", 2,
Assert.assertTrue( deviceAllocationMapping.size());
assertTrue("Expected GPU device is not found in allocations!",
deviceAllocationMapping.keySet().contains(new GpuDevice(1, 1))); deviceAllocationMapping.keySet().contains(new GpuDevice(1, 1)));
Assert.assertTrue( assertTrue("Expected GPU device is not found in allocations!",
deviceAllocationMapping.keySet().contains(new GpuDevice(2, 3))); deviceAllocationMapping.keySet().contains(new GpuDevice(2, 3)));
Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)), assertEquals("GPU device is not assigned to the expected container!",
deviceAllocationMapping.get(new GpuDevice(1, 1)),
getContainerId(1)); getContainerId(1));
// TEST CASE // TEST CASE
@ -477,17 +527,20 @@ public class TestGpuResourceHandlerImpl {
} catch (ResourceHandlerException e) { } catch (ResourceHandlerException e) {
caughtException = true; caughtException = true;
} }
Assert.assertTrue( assertTrue(
"Should fail since requested device Id is not in allowed list", "Should fail since requested device Id is not in allowed list",
caughtException); caughtException);
// Make sure internal state not changed. // Make sure internal state not changed.
deviceAllocationMapping = deviceAllocationMapping =
gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy(); gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy();
Assert.assertEquals(2, deviceAllocationMapping.size()); assertEquals("Unexpected number of allocated GPU devices!",
Assert.assertTrue(deviceAllocationMapping.keySet() 2, deviceAllocationMapping.size());
assertTrue("Expected GPU devices are not found in allocations!",
deviceAllocationMapping.keySet()
.containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3)))); .containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3))));
Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)), assertEquals("GPU device is not assigned to the expected container!",
deviceAllocationMapping.get(new GpuDevice(1, 1)),
getContainerId(1)); getContainerId(1));
// TEST CASE // TEST CASE
@ -509,17 +562,20 @@ public class TestGpuResourceHandlerImpl {
} catch (ResourceHandlerException e) { } catch (ResourceHandlerException e) {
caughtException = true; caughtException = true;
} }
Assert.assertTrue( assertTrue(
"Should fail since requested device Id is already assigned", "Should fail since requested device Id is already assigned",
caughtException); caughtException);
// Make sure internal state not changed. // Make sure internal state not changed.
deviceAllocationMapping = deviceAllocationMapping =
gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy(); gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy();
Assert.assertEquals(2, deviceAllocationMapping.size()); assertEquals("Unexpected number of allocated GPU devices!",
Assert.assertTrue(deviceAllocationMapping.keySet() 2, deviceAllocationMapping.size());
assertTrue("Expected GPU devices are not found in allocations!",
deviceAllocationMapping.keySet()
.containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3)))); .containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3))));
Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)), assertEquals("GPU device is not assigned to the expected container!",
deviceAllocationMapping.get(new GpuDevice(1, 1)),
getContainerId(1)); getContainerId(1));
} }
} }