YARN-9475. [YARN-9473] Create basic VE plugin. Contributed by Peter Bacsko.
This commit is contained in:
parent
e79a9c12c1
commit
8a95ea61e1
|
@ -0,0 +1,306 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.com.nec;
|
||||
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
import org.apache.hadoop.util.Shell.CommandExecutor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.api.deviceplugin.Device;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.api.deviceplugin.DevicePlugin;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.api.deviceplugin.DevicePluginScheduler;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.api.deviceplugin.DeviceRegisterRequest;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.api.deviceplugin.DeviceRuntimeSpec;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.api.deviceplugin.YarnRuntimeType;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* A device framework plugin which supports NEC Vector Engine.
|
||||
*
|
||||
*/
|
||||
public class NECVEPlugin implements DevicePlugin, DevicePluginScheduler {
|
||||
private static final String HADOOP_COMMON_HOME = "HADOOP_COMMON_HOME";
|
||||
private static final String ENV_SCRIPT_PATH = "NEC_VE_GET_SCRIPT_PATH";
|
||||
private static final String ENV_SCRIPT_NAME = "NEC_VE_GET_SCRIPT_NAME";
|
||||
private static final String DEFAULT_SCRIPT_NAME = "nec-ve-get.py";
|
||||
private static final Logger LOG = LoggerFactory.getLogger(NECVEPlugin.class);
|
||||
private static final String[] DEFAULT_BINARY_SEARCH_DIRS = new String[]{
|
||||
"/usr/bin", "/bin", "/opt/nec/ve/bin"};
|
||||
|
||||
private String binaryPath;
|
||||
|
||||
private Function<String[], CommandExecutor>
|
||||
commandExecutorProvider = this::createCommandExecutor;
|
||||
|
||||
public NECVEPlugin() throws ResourceHandlerException {
|
||||
this(System::getenv, DEFAULT_BINARY_SEARCH_DIRS);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
NECVEPlugin(Function<String, String> envProvider, String[] scriptPaths)
|
||||
throws ResourceHandlerException {
|
||||
String binaryName = DEFAULT_SCRIPT_NAME;
|
||||
|
||||
String envScriptName = envProvider.apply(ENV_SCRIPT_NAME);
|
||||
if (envScriptName != null) {
|
||||
binaryName = envScriptName;
|
||||
}
|
||||
LOG.info("Use {} as script name.", envScriptName);
|
||||
|
||||
// Try to find the script based on an environment variable, if set
|
||||
boolean found = false;
|
||||
String envBinaryPath = envProvider.apply(ENV_SCRIPT_PATH);
|
||||
if (envBinaryPath != null) {
|
||||
this.binaryPath = getScriptFromEnvSetting(envBinaryPath);
|
||||
found = binaryPath != null;
|
||||
}
|
||||
|
||||
// Try $HADOOP_COMMON_HOME
|
||||
if (!found) {
|
||||
// print a warning only if the env variable was defined
|
||||
if (envBinaryPath != null) {
|
||||
LOG.warn("Script {} does not exist, falling back " +
|
||||
"to $HADOOP_COMMON_HOME/sbin/DevicePluginScript/", envBinaryPath);
|
||||
}
|
||||
|
||||
this.binaryPath = getScriptFromHadoopCommon(envProvider, binaryName);
|
||||
found = binaryPath != null;
|
||||
}
|
||||
|
||||
// Try the default search directories
|
||||
if (!found) {
|
||||
LOG.info("Script not found under" +
|
||||
" $HADOOP_COMMON_HOME/sbin/DevicePluginScript/," +
|
||||
" falling back to default search directories");
|
||||
|
||||
this.binaryPath = getScriptFromSearchDirs(binaryName, scriptPaths);
|
||||
found = binaryPath != null;
|
||||
}
|
||||
|
||||
// Script not found
|
||||
if (!found) {
|
||||
LOG.error("Script not found in "
|
||||
+ Arrays.toString(scriptPaths));
|
||||
throw new ResourceHandlerException(
|
||||
"No binary found for " + NECVEPlugin.class.getName());
|
||||
}
|
||||
}
|
||||
|
||||
public DeviceRegisterRequest getRegisterRequestInfo() {
|
||||
return DeviceRegisterRequest.Builder.newInstance()
|
||||
.setResourceName("nec.com/ve").build();
|
||||
}
|
||||
|
||||
public Set<Device> getDevices() {
|
||||
Set<Device> devices = null;
|
||||
|
||||
CommandExecutor executor =
|
||||
commandExecutorProvider.apply(new String[]{this.binaryPath});
|
||||
try {
|
||||
executor.execute();
|
||||
String output = executor.getOutput();
|
||||
devices = parseOutput(output);
|
||||
} catch (IOException e) {
|
||||
LOG.warn(e.toString());
|
||||
}
|
||||
return devices;
|
||||
}
|
||||
|
||||
public DeviceRuntimeSpec onDevicesAllocated(Set<Device> set,
|
||||
YarnRuntimeType yarnRuntimeType) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the output of the external Python script.
|
||||
*
|
||||
* Sample line:
|
||||
* id=0, dev=/dev/ve0, state=ONLINE, busId=0000:65:00.0, major=243, minor=0
|
||||
*/
|
||||
private Set<Device> parseOutput(String output) {
|
||||
Set<Device> devices = new HashSet<>();
|
||||
|
||||
LOG.info("Parsing output: {}", output);
|
||||
String[] lines = output.split("\n");
|
||||
for (String line : lines) {
|
||||
Device.Builder builder = Device.Builder.newInstance();
|
||||
|
||||
// map key --> builder calls
|
||||
Map<String, Consumer<String>> builderInvocations =
|
||||
getBuilderInvocationsMap(builder);
|
||||
|
||||
String[] keyValues = line.trim().split(",");
|
||||
for (String keyValue : keyValues) {
|
||||
String[] tokens = keyValue.trim().split("=");
|
||||
if (tokens.length != 2) {
|
||||
LOG.error("Unknown format of script output! Skipping this line");
|
||||
continue;
|
||||
}
|
||||
|
||||
final String key = tokens[0];
|
||||
final String value = tokens[1];
|
||||
|
||||
Consumer<String> builderInvocation = builderInvocations.get(key);
|
||||
if (builderInvocation != null) {
|
||||
builderInvocation.accept(value);
|
||||
} else {
|
||||
LOG.warn("Unknown key {}, ignored", key);
|
||||
}
|
||||
}// for key value pairs
|
||||
Device device = builder.build();
|
||||
if (device.isHealthy()) {
|
||||
devices.add(device);
|
||||
} else {
|
||||
LOG.warn("Skipping device {} because it's not healthy", device);
|
||||
}
|
||||
}
|
||||
|
||||
return devices;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onDevicesReleased(Set<Device> releasedDevices) {
|
||||
// nop
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Device> allocateDevices(Set<Device> availableDevices, int count,
|
||||
Map<String, String> env) {
|
||||
// Can consider topology, utilization.etc
|
||||
Set<Device> allocated = new HashSet<>();
|
||||
int number = 0;
|
||||
for (Device d : availableDevices) {
|
||||
allocated.add(d);
|
||||
number++;
|
||||
if (number == count) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return allocated;
|
||||
}
|
||||
|
||||
private CommandExecutor createCommandExecutor(String[] command) {
|
||||
return new Shell.ShellCommandExecutor(
|
||||
command);
|
||||
}
|
||||
|
||||
private String getScriptFromEnvSetting(String envBinaryPath) {
|
||||
LOG.info("Checking script path: {}", envBinaryPath);
|
||||
File f = new File(envBinaryPath);
|
||||
|
||||
if (!f.exists()) {
|
||||
LOG.warn("Script {} does not exist", envBinaryPath);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (f.isDirectory()) {
|
||||
LOG.warn("Specified path {} is a directory", envBinaryPath);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!FileUtil.canExecute(f)) {
|
||||
LOG.warn("Script {} is not executable", envBinaryPath);
|
||||
return null;
|
||||
}
|
||||
|
||||
LOG.info("Found script: {}", envBinaryPath);
|
||||
|
||||
return envBinaryPath;
|
||||
}
|
||||
|
||||
private String getScriptFromHadoopCommon(
|
||||
Function<String, String> envProvider, String binaryName) {
|
||||
String scriptPath = null;
|
||||
String hadoopCommon = envProvider.apply(HADOOP_COMMON_HOME);
|
||||
|
||||
if (hadoopCommon != null) {
|
||||
String targetPath = hadoopCommon +
|
||||
"/sbin/DevicePluginScript/" + binaryName;
|
||||
LOG.info("Checking script {}: ", targetPath);
|
||||
if (new File(targetPath).exists()) {
|
||||
LOG.info("Found script: {}", targetPath);
|
||||
scriptPath = targetPath;
|
||||
}
|
||||
} else {
|
||||
LOG.info("$HADOOP_COMMON_HOME is not set");
|
||||
}
|
||||
|
||||
return scriptPath;
|
||||
}
|
||||
|
||||
private String getScriptFromSearchDirs(String binaryName,
|
||||
String[] scriptPaths) {
|
||||
String scriptPath = null;
|
||||
|
||||
for (String dir : scriptPaths) {
|
||||
File f = new File(dir, binaryName);
|
||||
if (f.exists()) {
|
||||
LOG.info("Found script: {}", dir);
|
||||
scriptPath = f.getAbsolutePath();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return scriptPath;
|
||||
}
|
||||
|
||||
private Map<String, Consumer<String>> getBuilderInvocationsMap(
|
||||
Device.Builder builder) {
|
||||
Map<String, Consumer<String>> builderInvocations = new HashMap<>();
|
||||
builderInvocations.put("id", v -> builder.setId(Integer.parseInt(v)));
|
||||
builderInvocations.put("dev", v -> builder.setDevPath(v));
|
||||
builderInvocations.put("state", v -> {
|
||||
if (v.equals("ONLINE")) {
|
||||
builder.setHealthy(true);
|
||||
}
|
||||
builder.setStatus(v);
|
||||
});
|
||||
builderInvocations.put("busId", v -> builder.setBusID(v));
|
||||
builderInvocations.put("major",
|
||||
v -> builder.setMajorNumber(Integer.parseInt(v)));
|
||||
builderInvocations.put("minor",
|
||||
v -> builder.setMinorNumber(Integer.parseInt(v)));
|
||||
|
||||
return builderInvocations;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
void setCommandExecutorProvider(
|
||||
Function<String[], CommandExecutor> provider) {
|
||||
this.commandExecutorProvider = provider;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
String getBinaryPath() {
|
||||
return binaryPath;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.com.nec;
|
Loading…
Reference in New Issue