YARN-9337. GPU auto-discovery script runs even when the resource is given by hand. Contributed by Adam Antal

(cherry picked from commit 61b0c2bb7c)
This commit is contained in:
Szilard Nemeth 2019-07-12 17:28:14 +02:00
parent c61c969668
commit 0ede873090
2 changed files with 52 additions and 25 deletions

View File

@ -69,6 +69,8 @@ public class GpuDiscoverer {
private int numOfErrorExecutionSinceLastSucceed = 0;
private GpuDeviceInformation lastDiscoveredGpuInformation = null;
private List<GpuDevice> gpuDevicesFromUser;
private void validateConfOrThrowException() throws YarnException {
if (conf == null) {
throw new YarnException("Please initialize (call initialize) before use "
@ -143,6 +145,14 @@ public class GpuDiscoverer {
}
}
private boolean IsAutoDiscoveryEnabled() {
String allowedDevicesStr = conf.get(
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
return allowedDevicesStr.equals(
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
}
/**
* Get list of GPU devices usable by YARN.
*
@ -153,15 +163,13 @@ public class GpuDiscoverer {
throws YarnException {
validateConfOrThrowException();
String allowedDevicesStr = conf.get(
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
if (allowedDevicesStr.equals(
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
if (IsAutoDiscoveryEnabled()) {
return parseGpuDevicesFromAutoDiscoveredGpuInfo();
} else {
return parseGpuDevicesFromUserDefinedValues(allowedDevicesStr);
if (gpuDevicesFromUser == null) {
gpuDevicesFromUser = parseGpuDevicesFromUserDefinedValues();
}
return gpuDevicesFromUser;
}
}
@ -193,16 +201,16 @@ public class GpuDiscoverer {
}
/**
* @param devices allowed devices coming from the config.
* Individual devices should be separated by commas.
* <br>The format of individual devices should be:
* &lt;index:&gt;&lt;minorNumber&gt;
* @return List of GpuDevices
* @throws YarnException when a GPU device is defined as a duplicate.
* The first duplicate GPU device will be added to the exception message.
*/
private List<GpuDevice> parseGpuDevicesFromUserDefinedValues(String devices)
private List<GpuDevice> parseGpuDevicesFromUserDefinedValues()
throws YarnException {
String devices = conf.get(
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
if (devices.trim().isEmpty()) {
throw GpuDeviceSpecificationException.createWithEmptyValueSpecified();
}
@ -244,6 +252,7 @@ public class GpuDiscoverer {
public synchronized void initialize(Configuration config)
throws YarnException {
this.conf = config;
if (IsAutoDiscoveryEnabled()) {
numOfErrorExecutionSinceLastSucceed = 0;
lookUpAutoDiscoveryBinary(config);
@ -259,6 +268,7 @@ public class GpuDiscoverer {
LOG.warn(msg);
}
}
}
private void lookUpAutoDiscoveryBinary(Configuration config)
throws YarnException {

View File

@ -40,6 +40,7 @@ import java.util.List;
import java.util.function.Consumer;
import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_GPU_ALLOWED_DEVICES;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer.DEFAULT_BINARY_NAME;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.not;
@ -49,6 +50,9 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.verify;
public class TestGpuDiscoverer {
private static final Logger LOG = LoggerFactory.getLogger(
@ -96,7 +100,7 @@ public class TestGpuDiscoverer {
private Configuration createConfigWithAllowedDevices(String s) {
Configuration conf = new Configuration(false);
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
conf.set(NM_GPU_ALLOWED_DEVICES, s);
setupFakeBinary(conf);
return conf;
}
@ -495,4 +499,17 @@ public class TestGpuDiscoverer {
"executable in the default directories:"));
}
}
@Test
public void testScriptNotCalled() throws YarnException {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:1,2:3");
GpuDiscoverer gpuSpy = spy(GpuDiscoverer.class);
gpuSpy.initialize(conf);
gpuSpy.getGpusUsableByYarn();
verify(gpuSpy, never()).getGpuDeviceInformation();
}
}