YARN-9118. Handle exceptions with parsing user defined GPU devices in GpuDiscoverer. Contributed by Szilard Nemeth.
(cherry picked from commit 95fbbfed75
)
This commit is contained in:
parent
040d475030
commit
d6377c8b68
|
@ -65,11 +65,10 @@ public class GpuResourceHandlerImpl implements ResourceHandler {
|
|||
throws ResourceHandlerException {
|
||||
List<GpuDevice> usableGpus;
|
||||
try {
|
||||
usableGpus = GpuDiscoverer.getInstance()
|
||||
.getGpusUsableByYarn();
|
||||
usableGpus = GpuDiscoverer.getInstance().getGpusUsableByYarn();
|
||||
if (usableGpus == null || usableGpus.isEmpty()) {
|
||||
String message = "GPU is enabled on the NodeManager, but couldn't find "
|
||||
+ "any usable GPU devices, please double check configuration.";
|
||||
+ "any usable GPU devices, please double check configuration!";
|
||||
LOG.error(message);
|
||||
throw new ResourceHandlerException(message);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
|
||||
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
|
||||
/**
|
||||
* This exception is to be thrown when allowed GPU device specification
|
||||
* is empty or invalid.
|
||||
*/
|
||||
public final class GpuDeviceSpecificationException extends YarnException {
|
||||
private static final String VALID_FORMAT_MESSAGE = "The valid format " +
|
||||
"should be: index:minor_number";
|
||||
|
||||
private GpuDeviceSpecificationException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
private GpuDeviceSpecificationException(String message, Exception cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
public static GpuDeviceSpecificationException createWithEmptyValueSpecified() {
|
||||
return new GpuDeviceSpecificationException(
|
||||
YarnConfiguration.NM_GPU_ALLOWED_DEVICES +
|
||||
" is set to an empty value! Please specify " +
|
||||
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES +
|
||||
" to enable auto-discovery or " +
|
||||
"please enter the GPU device IDs manually! " +
|
||||
VALID_FORMAT_MESSAGE);
|
||||
}
|
||||
|
||||
public static GpuDeviceSpecificationException createWithWrongValueSpecified(
|
||||
String device, String configValue, Exception cause) {
|
||||
final String message = createIllegalFormatMessage(device, configValue);
|
||||
return new GpuDeviceSpecificationException(message, cause);
|
||||
}
|
||||
|
||||
public static GpuDeviceSpecificationException createWithWrongValueSpecified(
|
||||
String device, String configValue) {
|
||||
final String message = createIllegalFormatMessage(device, configValue);
|
||||
return new GpuDeviceSpecificationException(message);
|
||||
}
|
||||
|
||||
public static GpuDeviceSpecificationException createWithDuplicateValueSpecified(
|
||||
String device, String configValue) {
|
||||
final String message = createDuplicateFormatMessage(device, configValue);
|
||||
return new GpuDeviceSpecificationException(message);
|
||||
}
|
||||
|
||||
private static String createIllegalFormatMessage(String device,
|
||||
String configValue) {
|
||||
return String.format("Illegal format of individual GPU device: %s, " +
|
||||
"the whole config value was: '%s'! " + VALID_FORMAT_MESSAGE,
|
||||
device, configValue);
|
||||
}
|
||||
|
||||
private static String createDuplicateFormatMessage(String device,
|
||||
String configValue) {
|
||||
return String.format("GPU device %s" +
|
||||
" has a duplicate definition! " +
|
||||
"Please double-check the configuration " +
|
||||
YarnConfiguration.NM_GPU_ALLOWED_DEVICES +
|
||||
"! Current value of the configuration is: %s",
|
||||
device, configValue);
|
||||
}
|
||||
}
|
|
@ -19,8 +19,8 @@
|
|||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -47,7 +47,7 @@ public class GpuDiscoverer {
|
|||
public static final Logger LOG = LoggerFactory.getLogger(
|
||||
GpuDiscoverer.class);
|
||||
@VisibleForTesting
|
||||
protected static final String DEFAULT_BINARY_NAME = "nvidia-smi";
|
||||
static final String DEFAULT_BINARY_NAME = "nvidia-smi";
|
||||
|
||||
// When executable path not set, try to search default dirs
|
||||
// By default search /usr/bin, /bin, and /usr/local/nvidia/bin (when
|
||||
|
@ -70,7 +70,7 @@ public class GpuDiscoverer {
|
|||
private GpuDeviceInformationParser parser = new GpuDeviceInformationParser();
|
||||
|
||||
private int numOfErrorExecutionSinceLastSucceed = 0;
|
||||
GpuDeviceInformation lastDiscoveredGpuInformation = null;
|
||||
private GpuDeviceInformation lastDiscoveredGpuInformation = null;
|
||||
|
||||
private void validateConfOrThrowException() throws YarnException {
|
||||
if (conf == null) {
|
||||
|
@ -89,7 +89,7 @@ public class GpuDiscoverer {
|
|||
* @return GpuDeviceInformation
|
||||
* @throws YarnException when any error happens
|
||||
*/
|
||||
public synchronized GpuDeviceInformation getGpuDeviceInformation()
|
||||
synchronized GpuDeviceInformation getGpuDeviceInformation()
|
||||
throws YarnException {
|
||||
validateConfOrThrowException();
|
||||
|
||||
|
@ -112,10 +112,9 @@ public class GpuDiscoverer {
|
|||
try {
|
||||
output = Shell.execCommand(environment,
|
||||
new String[] { pathOfGpuBinary, "-x", "-q" }, MAX_EXEC_TIMEOUT_MS);
|
||||
GpuDeviceInformation info = parser.parseXml(output);
|
||||
lastDiscoveredGpuInformation = parser.parseXml(output);
|
||||
numOfErrorExecutionSinceLastSucceed = 0;
|
||||
lastDiscoveredGpuInformation = info;
|
||||
return info;
|
||||
return lastDiscoveredGpuInformation;
|
||||
} catch (IOException e) {
|
||||
numOfErrorExecutionSinceLastSucceed++;
|
||||
String msg =
|
||||
|
@ -149,52 +148,91 @@ public class GpuDiscoverer {
|
|||
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
|
||||
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
|
||||
|
||||
List<GpuDevice> gpuDevices = new ArrayList<>();
|
||||
|
||||
if (allowedDevicesStr.equals(
|
||||
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
|
||||
// Get gpu device information from system.
|
||||
if (null == lastDiscoveredGpuInformation) {
|
||||
String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
|
||||
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
|
||||
+ ", however automatically discovering "
|
||||
+ "GPU information failed, please check NodeManager log for more"
|
||||
+ " details, as an alternative, admin can specify "
|
||||
+ YarnConfiguration.NM_GPU_ALLOWED_DEVICES
|
||||
+ " manually to enable GPU isolation.";
|
||||
LOG.error(msg);
|
||||
throw new YarnException(msg);
|
||||
}
|
||||
|
||||
if (lastDiscoveredGpuInformation.getGpus() != null) {
|
||||
for (int i = 0; i < lastDiscoveredGpuInformation.getGpus().size();
|
||||
i++) {
|
||||
List<PerGpuDeviceInformation> gpuInfos =
|
||||
lastDiscoveredGpuInformation.getGpus();
|
||||
gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber()));
|
||||
}
|
||||
}
|
||||
} else{
|
||||
for (String s : allowedDevicesStr.split(",")) {
|
||||
if (s.trim().length() > 0) {
|
||||
String[] kv = s.trim().split(":");
|
||||
if (kv.length != 2) {
|
||||
throw new YarnException(
|
||||
"Illegal format, it should be index:minor_number format, now it="
|
||||
+ s);
|
||||
}
|
||||
|
||||
gpuDevices.add(
|
||||
new GpuDevice(Integer.parseInt(kv[0]), Integer.parseInt(kv[1])));
|
||||
}
|
||||
}
|
||||
LOG.info("Allowed GPU devices:" + gpuDevices);
|
||||
return parseGpuDevicesFromAutoDiscoveredGpuInfo();
|
||||
} else {
|
||||
return parseGpuDevicesFromUserDefinedValues(allowedDevicesStr);
|
||||
}
|
||||
}
|
||||
|
||||
private List<GpuDevice> parseGpuDevicesFromAutoDiscoveredGpuInfo()
|
||||
throws YarnException {
|
||||
if (lastDiscoveredGpuInformation == null) {
|
||||
String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
|
||||
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
|
||||
+ ", however automatically discovering "
|
||||
+ "GPU information failed, please check NodeManager log for more"
|
||||
+ " details, as an alternative, admin can specify "
|
||||
+ YarnConfiguration.NM_GPU_ALLOWED_DEVICES
|
||||
+ " manually to enable GPU isolation.";
|
||||
LOG.error(msg);
|
||||
throw new YarnException(msg);
|
||||
}
|
||||
|
||||
List<GpuDevice> gpuDevices = new ArrayList<>();
|
||||
if (lastDiscoveredGpuInformation.getGpus() != null) {
|
||||
int numberOfGpus = lastDiscoveredGpuInformation.getGpus().size();
|
||||
LOG.debug("Found {} GPU devices", numberOfGpus);
|
||||
for (int i = 0; i < numberOfGpus; i++) {
|
||||
List<PerGpuDeviceInformation> gpuInfos =
|
||||
lastDiscoveredGpuInformation.getGpus();
|
||||
gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber()));
|
||||
}
|
||||
}
|
||||
return gpuDevices;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param devices allowed devices coming from the config.
|
||||
* Individual devices should be separated by commas.
|
||||
* <br>The format of individual devices should be:
|
||||
* <index:><minorNumber>
|
||||
* @return List of GpuDevices
|
||||
* @throws YarnException when a GPU device is defined as a duplicate.
|
||||
* The first duplicate GPU device will be added to the exception message.
|
||||
*/
|
||||
private List<GpuDevice> parseGpuDevicesFromUserDefinedValues(String devices)
|
||||
throws YarnException {
|
||||
if (devices.trim().isEmpty()) {
|
||||
throw GpuDeviceSpecificationException.createWithEmptyValueSpecified();
|
||||
}
|
||||
List<GpuDevice> gpuDevices = Lists.newArrayList();
|
||||
for (String device : devices.split(",")) {
|
||||
if (device.trim().length() > 0) {
|
||||
String[] splitByColon = device.trim().split(":");
|
||||
if (splitByColon.length != 2) {
|
||||
throw GpuDeviceSpecificationException.
|
||||
createWithWrongValueSpecified(device, devices);
|
||||
}
|
||||
|
||||
GpuDevice gpuDevice = parseGpuDevice(device, splitByColon, devices);
|
||||
if (!gpuDevices.contains(gpuDevice)) {
|
||||
gpuDevices.add(gpuDevice);
|
||||
} else {
|
||||
throw GpuDeviceSpecificationException
|
||||
.createWithDuplicateValueSpecified(device, devices);
|
||||
}
|
||||
}
|
||||
}
|
||||
LOG.info("Allowed GPU devices:" + gpuDevices);
|
||||
|
||||
return gpuDevices;
|
||||
}
|
||||
|
||||
public synchronized void initialize(Configuration conf) throws YarnException {
|
||||
private GpuDevice parseGpuDevice(String device, String[] splitByColon,
|
||||
String allowedDevicesStr) throws YarnException {
|
||||
try {
|
||||
int index = Integer.parseInt(splitByColon[0]);
|
||||
int minorNumber = Integer.parseInt(splitByColon[1]);
|
||||
return new GpuDevice(index, minorNumber);
|
||||
} catch (NumberFormatException e) {
|
||||
throw GpuDeviceSpecificationException.
|
||||
createWithWrongValueSpecified(device, allowedDevicesStr, e);
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void initialize(Configuration conf) {
|
||||
this.conf = conf;
|
||||
numOfErrorExecutionSinceLastSucceed = 0;
|
||||
String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
|
||||
|
@ -203,9 +241,7 @@ public class GpuDiscoverer {
|
|||
pathToExecutable = DEFAULT_BINARY_NAME;
|
||||
}
|
||||
|
||||
// Validate file existence
|
||||
File binaryPath = new File(pathToExecutable);
|
||||
|
||||
if (!binaryPath.exists()) {
|
||||
// When binary not exist, use default setting.
|
||||
boolean found = false;
|
||||
|
@ -249,12 +285,12 @@ public class GpuDiscoverer {
|
|||
}
|
||||
|
||||
@VisibleForTesting
|
||||
protected Map<String, String> getEnvironmentToRunCommand() {
|
||||
Map<String, String> getEnvironmentToRunCommand() {
|
||||
return environment;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
protected String getPathOfGpuBinary() {
|
||||
String getPathOfGpuBinary() {
|
||||
return pathOfGpuBinary;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,11 +40,11 @@ public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin {
|
|||
public void updateConfiguredResource(Resource res) throws YarnException {
|
||||
LOG.info("Initializing configured GPU resources for the NodeManager.");
|
||||
|
||||
List<GpuDevice> usableGpus =
|
||||
GpuDiscoverer.getInstance().getGpusUsableByYarn();
|
||||
if (null == usableGpus || usableGpus.isEmpty()) {
|
||||
String message = "GPU is enabled, but couldn't find any usable GPUs on the "
|
||||
+ "NodeManager.";
|
||||
List<GpuDevice> usableGpus = GpuDiscoverer.getInstance()
|
||||
.getGpusUsableByYarn();
|
||||
if (usableGpus == null || usableGpus.isEmpty()) {
|
||||
String message = "GPU is enabled, " +
|
||||
"but couldn't find any usable GPUs on the NodeManager!";
|
||||
LOG.error(message);
|
||||
// No gpu can be used by YARN.
|
||||
throw new YarnException(message);
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Package for GPU support classes.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
|
|
@ -23,17 +23,26 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Assume;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.ExpectedException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TestGpuDiscoverer {
|
||||
@Rule
|
||||
public ExpectedException exception = ExpectedException.none();
|
||||
|
||||
private String getTestParentFolder() {
|
||||
File f = new File("target/temp/" + TestGpuDiscoverer.class.getName());
|
||||
return f.getAbsolutePath();
|
||||
|
@ -51,6 +60,12 @@ public class TestGpuDiscoverer {
|
|||
f.mkdirs();
|
||||
}
|
||||
|
||||
private Configuration createConfigWithAllowedDevices(String s) {
|
||||
Configuration conf = new Configuration(false);
|
||||
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
|
||||
return conf;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLinuxGpuResourceDiscoverPluginConfig() throws Exception {
|
||||
// Only run this on demand.
|
||||
|
@ -61,10 +76,10 @@ public class TestGpuDiscoverer {
|
|||
Configuration conf = new Configuration(false);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
|
||||
assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
|
||||
plugin.getPathOfGpuBinary());
|
||||
Assert.assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH"));
|
||||
Assert.assertTrue(
|
||||
assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH"));
|
||||
assertTrue(
|
||||
plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
|
||||
|
||||
// test case 2, check mandatory set path.
|
||||
|
@ -74,18 +89,18 @@ public class TestGpuDiscoverer {
|
|||
conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
|
||||
plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
Assert.assertEquals(fakeBinary.getAbsolutePath(),
|
||||
assertEquals(fakeBinary.getAbsolutePath(),
|
||||
plugin.getPathOfGpuBinary());
|
||||
Assert.assertNull(plugin.getEnvironmentToRunCommand().get("PATH"));
|
||||
assertNull(plugin.getEnvironmentToRunCommand().get("PATH"));
|
||||
|
||||
// test case 3, check mandatory set path, but binary doesn't exist so default
|
||||
// path will be used.
|
||||
fakeBinary.delete();
|
||||
plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
|
||||
assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
|
||||
plugin.getPathOfGpuBinary());
|
||||
Assert.assertTrue(
|
||||
assertTrue(
|
||||
plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
|
||||
}
|
||||
|
||||
|
@ -100,42 +115,165 @@ public class TestGpuDiscoverer {
|
|||
plugin.initialize(conf);
|
||||
GpuDeviceInformation info = plugin.getGpuDeviceInformation();
|
||||
|
||||
Assert.assertTrue(info.getGpus().size() > 0);
|
||||
Assert.assertEquals(plugin.getGpusUsableByYarn().size(),
|
||||
assertTrue(info.getGpus().size() > 0);
|
||||
assertEquals(plugin.getGpusUsableByYarn().size(),
|
||||
info.getGpus().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getNumberOfUsableGpusFromConfig() throws YarnException {
|
||||
Configuration conf = new Configuration(false);
|
||||
public void testGetNumberOfUsableGpusFromConfigSingleDevice()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("1:2");
|
||||
|
||||
// Illegal format
|
||||
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3");
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
try {
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
Assert.fail("Illegal format, should fail.");
|
||||
} catch (YarnException e) {
|
||||
// Expected
|
||||
}
|
||||
plugin.initialize(conf);
|
||||
List<GpuDevice> usableGpuDevices = plugin.getGpusUsableByYarn();
|
||||
assertEquals(1, usableGpuDevices.size());
|
||||
|
||||
// Valid format
|
||||
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3:4");
|
||||
plugin = new GpuDiscoverer();
|
||||
assertEquals(1, usableGpuDevices.get(0).getIndex());
|
||||
assertEquals(2, usableGpuDevices.get(0).getMinorNumber());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigIllegalFormat()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,3");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfig() throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,3:4");
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
|
||||
List<GpuDevice> usableGpuDevices = plugin.getGpusUsableByYarn();
|
||||
Assert.assertEquals(4, usableGpuDevices.size());
|
||||
assertEquals(4, usableGpuDevices.size());
|
||||
|
||||
Assert.assertTrue(0 == usableGpuDevices.get(0).getIndex());
|
||||
Assert.assertTrue(1 == usableGpuDevices.get(1).getIndex());
|
||||
Assert.assertTrue(2 == usableGpuDevices.get(2).getIndex());
|
||||
Assert.assertTrue(3 == usableGpuDevices.get(3).getIndex());
|
||||
assertEquals(0, usableGpuDevices.get(0).getIndex());
|
||||
assertEquals(0, usableGpuDevices.get(0).getMinorNumber());
|
||||
|
||||
Assert.assertTrue(0 == usableGpuDevices.get(0).getMinorNumber());
|
||||
Assert.assertTrue(1 == usableGpuDevices.get(1).getMinorNumber());
|
||||
Assert.assertTrue(2 == usableGpuDevices.get(2).getMinorNumber());
|
||||
Assert.assertTrue(4 == usableGpuDevices.get(3).getMinorNumber());
|
||||
assertEquals(1, usableGpuDevices.get(1).getIndex());
|
||||
assertEquals(1, usableGpuDevices.get(1).getMinorNumber());
|
||||
|
||||
assertEquals(2, usableGpuDevices.get(2).getIndex());
|
||||
assertEquals(2, usableGpuDevices.get(2).getMinorNumber());
|
||||
|
||||
assertEquals(3, usableGpuDevices.get(3).getIndex());
|
||||
assertEquals(4, usableGpuDevices.get(3).getMinorNumber());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigDuplicateValues()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,1:1");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigDuplicateValues2()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,1:1,2:2");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigIncludingSpaces()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0 : 0,1 : 1");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigIncludingGibberish()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0:@$1,1:1");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigIncludingLetters()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("x:0, 1:y");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigWithoutIndexNumber()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices(":0, :1");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigEmptyString()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigValueWithoutComma()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0:0 0:1");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigValueWithoutComma2()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0.1 0.2");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNumberOfUsableGpusFromConfigValueWithoutColonSeparator()
|
||||
throws YarnException {
|
||||
Configuration conf = createConfigWithAllowedDevices("0.1,0.2");
|
||||
|
||||
exception.expect(GpuDeviceSpecificationException.class);
|
||||
GpuDiscoverer plugin = new GpuDiscoverer();
|
||||
plugin.initialize(conf);
|
||||
plugin.getGpusUsableByYarn();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue