YARN-7637. GPU volume creation command fails when work preserving is disabled at NM. Contributed by Zian Chen.

This commit is contained in:
Sunil G 2018-03-01 22:39:32 +05:30
parent 6ca4e3017b
commit 22928c0c60
2 changed files with 31 additions and 0 deletions

View File

@ -272,6 +272,7 @@ public class NMNullStateStoreService extends NMStateStoreService {
public void storeAssignedResources(Container container, public void storeAssignedResources(Container container,
String resourceType, List<Serializable> assignedResources) String resourceType, List<Serializable> assignedResources)
throws IOException { throws IOException {
updateContainerResourceMapping(container, resourceType, assignedResources);
} }
@Override @Override

View File

@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resource
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.util.resource.TestResourceUtils; import org.apache.hadoop.yarn.util.resource.TestResourceUtils;
import org.junit.Assert; import org.junit.Assert;
@ -342,6 +343,35 @@ public class TestGpuResourceHandler {
eq(container), eq(ResourceInformation.GPU_URI), anyList()); eq(container), eq(ResourceInformation.GPU_URI), anyList());
} }
@Test
public void testAllocationStoredWithNULLStateStore() throws Exception {
NMNullStateStoreService mockNMNULLStateStore = mock(NMNullStateStoreService.class);
Context nmnctx = mock(Context.class);
when(nmnctx.getNMStateStore()).thenReturn(mockNMNULLStateStore);
GpuResourceHandlerImpl gpuNULLStateResourceHandler =
new GpuResourceHandlerImpl(nmnctx, mockCGroupsHandler,
mockPrivilegedExecutor);
Configuration conf = new YarnConfiguration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
GpuDiscoverer.getInstance().initialize(conf);
gpuNULLStateResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
gpuNULLStateResourceHandler.getGpuAllocator().getAvailableGpus());
/* Start container 1, asks 3 containers */
Container container = mockContainerWithGpuRequest(1, 3);
gpuNULLStateResourceHandler.preStart(container);
verify(nmnctx.getNMStateStore()).storeAssignedResources(container,
ResourceInformation.GPU_URI, Arrays
.asList(new GpuDevice(0, 0), new GpuDevice(1, 1),
new GpuDevice(2, 3)));
}
@Test @Test
public void testRecoverResourceAllocation() throws Exception { public void testRecoverResourceAllocation() throws Exception {
Configuration conf = new YarnConfiguration(); Configuration conf = new YarnConfiguration();