YARN-10107. Fix GpuResourcePlugin#getNMResourceInfo to honor Auto Discovery Enabled
Contributed by Szilard Nemeth.
This commit is contained in:
parent
e578e52aae
commit
825db8fe2a
@ -136,7 +136,7 @@ public synchronized GpuDeviceInformation getGpuDeviceInformation()
|
|||||||
return lastDiscoveredGpuInformation;
|
return lastDiscoveredGpuInformation;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isAutoDiscoveryEnabled() {
|
boolean isAutoDiscoveryEnabled() {
|
||||||
String allowedDevicesStr = getConf().get(
|
String allowedDevicesStr = getConf().get(
|
||||||
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
|
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
|
||||||
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
|
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
|
||||||
|
@ -94,27 +94,29 @@ public DockerCommandPlugin getDockerCommandPluginInstance() {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
|
public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
|
||||||
GpuDeviceInformation gpuDeviceInformation;
|
final GpuDeviceInformation gpuDeviceInformation;
|
||||||
|
|
||||||
//At this point the gpu plugin is already enabled
|
if (gpuDiscoverer.isAutoDiscoveryEnabled()) {
|
||||||
checkGpuResourceHandler();
|
//At this point the gpu plugin is already enabled
|
||||||
|
checkGpuResourceHandler();
|
||||||
|
|
||||||
checkErrorCount();
|
checkErrorCount();
|
||||||
try{
|
try{
|
||||||
gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation();
|
gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation();
|
||||||
numOfErrorExecutionSinceLastSucceed = 0;
|
numOfErrorExecutionSinceLastSucceed = 0;
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
LOG.error(e.getMessage(), e);
|
LOG.error(e.getMessage(), e);
|
||||||
numOfErrorExecutionSinceLastSucceed++;
|
numOfErrorExecutionSinceLastSucceed++;
|
||||||
throw e;
|
throw e;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
gpuDeviceInformation = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
GpuResourceAllocator gpuResourceAllocator =
|
GpuResourceAllocator gpuResourceAllocator =
|
||||||
gpuResourceHandler.getGpuAllocator();
|
gpuResourceHandler.getGpuAllocator();
|
||||||
List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpus();
|
List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpus();
|
||||||
List<AssignedGpuDevice> assignedGpuDevices =
|
List<AssignedGpuDevice> assignedGpuDevices =
|
||||||
gpuResourceAllocator.getAssignedGpus();
|
gpuResourceAllocator.getAssignedGpus();
|
||||||
|
|
||||||
return new NMGpuResourceInfo(gpuDeviceInformation, totalGpus,
|
return new NMGpuResourceInfo(gpuDeviceInformation, totalGpus,
|
||||||
assignedGpuDevices);
|
assignedGpuDevices);
|
||||||
}
|
}
|
||||||
|
@ -19,15 +19,38 @@
|
|||||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
|
||||||
|
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class TestGpuResourcePlugin {
|
public class TestGpuResourcePlugin {
|
||||||
|
|
||||||
|
private GpuDiscoverer createMockDiscoverer() throws YarnException {
|
||||||
|
GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
|
||||||
|
when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(true);
|
||||||
|
|
||||||
|
PerGpuDeviceInformation gpu =
|
||||||
|
new PerGpuDeviceInformation();
|
||||||
|
gpu.setProductName("testGpu");
|
||||||
|
List<PerGpuDeviceInformation> gpus = Lists.newArrayList();
|
||||||
|
gpus.add(gpu);
|
||||||
|
|
||||||
|
GpuDeviceInformation gpuDeviceInfo = new GpuDeviceInformation();
|
||||||
|
gpuDeviceInfo.setGpus(gpus);
|
||||||
|
when(gpuDiscoverer.getGpuDeviceInformation()).thenReturn(gpuDeviceInfo);
|
||||||
|
return gpuDiscoverer;
|
||||||
|
}
|
||||||
|
|
||||||
@Test(expected = YarnException.class)
|
@Test(expected = YarnException.class)
|
||||||
public void testResourceHandlerNotInitialized() throws YarnException {
|
public void testResourceHandlerNotInitialized() throws YarnException {
|
||||||
GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
|
GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
|
||||||
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
||||||
mock(GpuNodeResourceUpdateHandler.class);
|
mock(GpuNodeResourceUpdateHandler.class);
|
||||||
|
|
||||||
@ -39,7 +62,7 @@ public void testResourceHandlerNotInitialized() throws YarnException {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testResourceHandlerIsInitialized() throws YarnException {
|
public void testResourceHandlerIsInitialized() throws YarnException {
|
||||||
GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
|
GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
|
||||||
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
||||||
mock(GpuNodeResourceUpdateHandler.class);
|
mock(GpuNodeResourceUpdateHandler.class);
|
||||||
|
|
||||||
@ -51,4 +74,52 @@ public void testResourceHandlerIsInitialized() throws YarnException {
|
|||||||
//Not throwing any exception
|
//Not throwing any exception
|
||||||
target.getNMResourceInfo();
|
target.getNMResourceInfo();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNMResourceInfoAutoDiscoveryEnabled()
|
||||||
|
throws YarnException {
|
||||||
|
GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
|
||||||
|
|
||||||
|
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
||||||
|
mock(GpuNodeResourceUpdateHandler.class);
|
||||||
|
|
||||||
|
GpuResourcePlugin target =
|
||||||
|
new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
|
||||||
|
|
||||||
|
target.createResourceHandler(null, null, null);
|
||||||
|
|
||||||
|
NMGpuResourceInfo resourceInfo =
|
||||||
|
(NMGpuResourceInfo) target.getNMResourceInfo();
|
||||||
|
Assert.assertNotNull("GpuDeviceInformation should not be null",
|
||||||
|
resourceInfo.getGpuDeviceInformation());
|
||||||
|
|
||||||
|
List<PerGpuDeviceInformation> gpus =
|
||||||
|
resourceInfo.getGpuDeviceInformation().getGpus();
|
||||||
|
Assert.assertNotNull("List of PerGpuDeviceInformation should not be null",
|
||||||
|
gpus);
|
||||||
|
|
||||||
|
Assert.assertEquals("List of PerGpuDeviceInformation should have a " +
|
||||||
|
"size of 1", 1, gpus.size());
|
||||||
|
Assert.assertEquals("Product name of GPU does not match",
|
||||||
|
"testGpu", gpus.get(0).getProductName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNMResourceInfoAutoDiscoveryDisabled()
|
||||||
|
throws YarnException {
|
||||||
|
GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
|
||||||
|
when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(false);
|
||||||
|
|
||||||
|
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
||||||
|
mock(GpuNodeResourceUpdateHandler.class);
|
||||||
|
|
||||||
|
GpuResourcePlugin target =
|
||||||
|
new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
|
||||||
|
|
||||||
|
target.createResourceHandler(null, null, null);
|
||||||
|
|
||||||
|
NMGpuResourceInfo resourceInfo =
|
||||||
|
(NMGpuResourceInfo) target.getNMResourceInfo();
|
||||||
|
Assert.assertNull(resourceInfo.getGpuDeviceInformation());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user