From 22928c0c60de3ccbd6b27ac3a410e7566ce4136b Mon Sep 17 00:00:00 2001 From: Sunil G Date: Thu, 1 Mar 2018 22:39:32 +0530 Subject: [PATCH] YARN-7637. GPU volume creation command fails when work preserving is disabled at NM. Contributed by Zian Chen. --- .../recovery/NMNullStateStoreService.java | 1 + .../resources/gpu/TestGpuResourceHandler.java | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java index 2d522a9b6fa..78137bba257 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java @@ -272,6 +272,7 @@ public void removeAMRMProxyAppContext(ApplicationAttemptId attempt) public void storeAssignedResources(Container container, String resourceType, List assignedResources) throws IOException { + updateContainerResourceMapping(container, resourceType, assignedResources); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java index 87ab249a37b..9a8a4c9f284 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.util.resource.TestResourceUtils; import org.junit.Assert; @@ -342,6 +343,35 @@ public void testAllocationStored() throws Exception { eq(container), eq(ResourceInformation.GPU_URI), anyList()); } + @Test + public void testAllocationStoredWithNULLStateStore() throws Exception { + NMNullStateStoreService mockNMNULLStateStore = mock(NMNullStateStoreService.class); + + Context nmnctx = mock(Context.class); + when(nmnctx.getNMStateStore()).thenReturn(mockNMNULLStateStore); + + GpuResourceHandlerImpl gpuNULLStateResourceHandler = + new GpuResourceHandlerImpl(nmnctx, mockCGroupsHandler, + mockPrivilegedExecutor); + + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); + GpuDiscoverer.getInstance().initialize(conf); + + gpuNULLStateResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuNULLStateResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 3 containers */ + Container container = mockContainerWithGpuRequest(1, 3); + gpuNULLStateResourceHandler.preStart(container); + + verify(nmnctx.getNMStateStore()).storeAssignedResources(container, + ResourceInformation.GPU_URI, Arrays + .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), + new GpuDevice(2, 3))); + } + @Test public void testRecoverResourceAllocation() throws Exception { Configuration conf = new YarnConfiguration();