YARN-6545. Followup fix for YARN-6405. Contributed by Jian He

This commit is contained in:
Billie Rinaldi 2017-05-09 09:26:00 -07:00 committed by Jian He
parent 399525c2e0
commit ce05c6e981
20 changed files with 192 additions and 140 deletions

View File

@ -36,6 +36,10 @@ public interface ServiceApiConstants {
String SERVICE_NAME_LC = $("SERVICE_NAME.lc"); String SERVICE_NAME_LC = $("SERVICE_NAME.lc");
String USER = $("USER");
String DOMAIN = $("DOMAIN");
// Constants for component // Constants for component
String COMPONENT_NAME = $("COMPONENT_NAME"); String COMPONENT_NAME = $("COMPONENT_NAME");
@ -47,4 +51,19 @@ public interface ServiceApiConstants {
String COMPONENT_ID = $("COMPONENT_ID"); String COMPONENT_ID = $("COMPONENT_ID");
String CONTAINER_ID = $("CONTAINER_ID"); String CONTAINER_ID = $("CONTAINER_ID");
// Constants for default cluster ZK
String CLUSTER_ZK_QUORUM = $("CLUSTER_ZK_QUORUM");
// URI for the default cluster fs
String CLUSTER_FS_URI = $("CLUSTER_FS_URI");
// the host component of the cluster fs UI
String CLUSTER_FS_HOST = $("CLUSTER_FS_HOST");
// Path in zookeeper for a specific service
String SERVICE_ZK_PATH = $("SERVICE_ZK_PATH");
// Constants for service specific hdfs dir
String SERVICE_HDFS_DIR = $("SERVICE_HDFS_DIR");
} }

View File

@ -105,10 +105,7 @@ public class Configuration implements Serializable {
} }
public long getPropertyLong(String name, long defaultValue) { public long getPropertyLong(String name, long defaultValue) {
if (name == null) { String value = getProperty(name);
return defaultValue;
}
String value = properties.get(name.trim());
if (StringUtils.isEmpty(value)) { if (StringUtils.isEmpty(value)) {
return defaultValue; return defaultValue;
} }
@ -116,10 +113,7 @@ public class Configuration implements Serializable {
} }
public int getPropertyInt(String name, int defaultValue) { public int getPropertyInt(String name, int defaultValue) {
if (name == null) { String value = getProperty(name);
return defaultValue;
}
String value = properties.get(name.trim());
if (StringUtils.isEmpty(value)) { if (StringUtils.isEmpty(value)) {
return defaultValue; return defaultValue;
} }
@ -127,10 +121,7 @@ public class Configuration implements Serializable {
} }
public boolean getPropertyBool(String name, boolean defaultValue) { public boolean getPropertyBool(String name, boolean defaultValue) {
if (name == null) { String value = getProperty(name);
return defaultValue;
}
String value = properties.get(name.trim());
if (StringUtils.isEmpty(value)) { if (StringUtils.isEmpty(value)) {
return defaultValue; return defaultValue;
} }
@ -138,10 +129,11 @@ public class Configuration implements Serializable {
} }
public String getProperty(String name, String defaultValue) { public String getProperty(String name, String defaultValue) {
if (name == null) { String value = getProperty(name);
if (StringUtils.isEmpty(value)) {
return defaultValue; return defaultValue;
} }
return properties.get(name.trim()); return value;
} }
public void setProperty(String name, String value) { public void setProperty(String name, String value) {
@ -149,16 +141,10 @@ public class Configuration implements Serializable {
} }
public String getProperty(String name) { public String getProperty(String name) {
if (name == null) {
return null;
}
return properties.get(name.trim()); return properties.get(name.trim());
} }
public String getEnv(String name) { public String getEnv(String name) {
if (name == null) {
return null;
}
return env.get(name.trim()); return env.get(name.trim());
} }

View File

@ -1112,18 +1112,20 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe
"not a directory", folder); "not a directory", folder);
} }
for (File f : files) { if (files != null) {
srcFile = new Path(f.toURI()); for (File f : files) {
srcFile = new Path(f.toURI());
Path fileInFs = new Path(pkgPath, srcFile.getName()); Path fileInFs = new Path(pkgPath, srcFile.getName());
log.info("Installing file {} at {} and overwrite is {}.", log.info("Installing file {} at {} and overwrite is {}.",
srcFile, fileInFs, resourceInfo.overwrite); srcFile, fileInFs, resourceInfo.overwrite);
require(!(sfs.exists(fileInFs) && !resourceInfo.overwrite), require(!(sfs.exists(fileInFs) && !resourceInfo.overwrite),
"File exists at %s. Use --overwrite to overwrite.", fileInFs.toUri()); "File exists at %s. Use --overwrite to overwrite.", fileInFs.toUri());
sfs.copyFromLocalFile(false, resourceInfo.overwrite, srcFile, fileInFs); sfs.copyFromLocalFile(false, resourceInfo.overwrite, srcFile, fileInFs);
sfs.setPermission(fileInFs, sfs.setPermission(fileInFs,
new FsPermission(FsAction.READ_WRITE, FsAction.NONE, FsAction.NONE)); new FsPermission(FsAction.READ_WRITE, FsAction.NONE, FsAction.NONE));
}
} }
return EXIT_SUCCESS; return EXIT_SUCCESS;

View File

@ -597,53 +597,6 @@ public class CoreFileSystem {
providerResources.put(SliderKeys.SLIDER_DEPENDENCY_LOCALIZED_DIR_LINK, lc); providerResources.put(SliderKeys.SLIDER_DEPENDENCY_LOCALIZED_DIR_LINK, lc);
} }
/**
* Copy local file(s) to destination HDFS directory. If {@code localPath} is a
* local directory then all files matching the {@code filenameFilter}
* (optional) are copied, otherwise {@code filenameFilter} is ignored.
*
* @param localPath
* a local file or directory path
* @param filenameFilter
* if {@code localPath} is a directory then filenameFilter is used as
* a filter (if specified)
* @param destDir
* the destination HDFS directory where the file(s) should be copied
* @param fp
* file permissions of all the directories and files that will be
* created in this api
* @throws IOException
*/
public void copyLocalFilesToHdfs(File localPath,
FilenameFilter filenameFilter, Path destDir, FsPermission fp)
throws IOException {
if (localPath == null || destDir == null) {
throw new IOException("Either localPath or destDir is null");
}
fileSystem.getConf().set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY,
"000");
fileSystem.mkdirs(destDir, fp);
if (localPath.isDirectory()) {
// copy all local files under localPath to destDir (honoring filename
// filter if provided
File[] localFiles = localPath.listFiles(filenameFilter);
Path[] localFilePaths = new Path[localFiles.length];
int i = 0;
for (File localFile : localFiles) {
localFilePaths[i++] = new Path(localFile.getPath());
}
log.info("Copying {} files from {} to {}", i, localPath.toURI(),
destDir.toUri());
fileSystem.copyFromLocalFile(false, true, localFilePaths, destDir);
} else {
log.info("Copying file {} to {}", localPath.toURI(), destDir.toUri());
fileSystem.copyFromLocalFile(false, true, new Path(localPath.getPath()),
destDir);
}
// set permissions for all the files created in the destDir
fileSystem.setPermission(destDir, fp);
}
public void copyLocalFileToHdfs(File localPath, public void copyLocalFileToHdfs(File localPath,
Path destPath, FsPermission fp) Path destPath, FsPermission fp)
throws IOException { throws IOException {

View File

@ -25,7 +25,7 @@ public enum ConfigFormat {
JSON("json"), JSON("json"),
PROPERTIES("properties"), PROPERTIES("properties"),
XML("xml"), XML("xml"),
HADOOP_XML("hadoop-xml"), HADOOP_XML("hadoop_xml"),
ENV("env"), ENV("env"),
TEMPLATE("template"), TEMPLATE("template"),
YAML("yaml"), YAML("yaml"),

View File

@ -52,7 +52,7 @@ public class ZKIntegration implements Watcher, Closeable {
public static final String SVC_SLIDER = "/" + ZK_SERVICES + "/" + ZK_SLIDER; public static final String SVC_SLIDER = "/" + ZK_SERVICES + "/" + ZK_SLIDER;
public static final String SVC_SLIDER_USERS = SVC_SLIDER + "/" + ZK_USERS; public static final String SVC_SLIDER_USERS = SVC_SLIDER + "/" + ZK_USERS;
public static final List<String> ZK_USERS_PATH_LIST = new ArrayList<String>(); private static final List<String> ZK_USERS_PATH_LIST = new ArrayList<String>();
static { static {
ZK_USERS_PATH_LIST.add(ZK_SERVICES); ZK_USERS_PATH_LIST.add(ZK_SERVICES);
ZK_USERS_PATH_LIST.add(ZK_SLIDER); ZK_USERS_PATH_LIST.add(ZK_SLIDER);

View File

@ -20,10 +20,8 @@ package org.apache.slider.providers;
import org.apache.slider.api.ResourceKeys; import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.resource.Component; import org.apache.slider.api.resource.Component;
import org.apache.slider.server.appmaster.state.AppState; import org.apache.slider.server.appmaster.state.RoleInstance;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue; import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
@ -44,8 +42,7 @@ public final class ProviderRole {
public final String labelExpression; public final String labelExpression;
public final Component component; public final Component component;
public AtomicLong componentIdCounter = null; public AtomicLong componentIdCounter = null;
public AppState appState; public Queue<RoleInstance> failedInstances = new ConcurrentLinkedQueue<>();
public Queue<String> failedInstanceName = new ConcurrentLinkedQueue<String>();
public ProviderRole(String name, int id) { public ProviderRole(String name, int id) {
this(name, this(name,
id, id,
@ -78,7 +75,7 @@ public final class ProviderRole {
nodeFailureThreshold, nodeFailureThreshold,
placementTimeoutSeconds, placementTimeoutSeconds,
labelExpression, labelExpression,
new Component().name(name).numberOfContainers(0L), null); new Component().name(name).numberOfContainers(0L));
} }
/** /**
@ -88,13 +85,13 @@ public final class ProviderRole {
* @param id ID. This becomes the YARN priority * @param id ID. This becomes the YARN priority
* @param policy placement policy * @param policy placement policy
* @param nodeFailureThreshold threshold for node failures (within a reset interval) * @param nodeFailureThreshold threshold for node failures (within a reset interval)
* after which a node failure is considered an app failure * after which a node failure is considered an app failure
* @param placementTimeoutSeconds for lax placement, timeout in seconds before * @param placementTimeoutSeconds for lax placement, timeout in seconds before
* @param labelExpression label expression for requests; may be null * @param labelExpression label expression for requests; may be null
*/ */
public ProviderRole(String name, String group, int id, int policy, public ProviderRole(String name, String group, int id, int policy,
int nodeFailureThreshold, long placementTimeoutSeconds, int nodeFailureThreshold, long placementTimeoutSeconds,
String labelExpression, Component component, AppState state) { String labelExpression, Component component) {
this.name = name; this.name = name;
if (group == null) { if (group == null) {
this.group = name; this.group = name;
@ -110,7 +107,6 @@ public final class ProviderRole {
if(component.getUniqueComponentSupport()) { if(component.getUniqueComponentSupport()) {
componentIdCounter = new AtomicLong(0); componentIdCounter = new AtomicLong(0);
} }
this.appState = state;
} }

View File

@ -32,7 +32,6 @@ import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.slider.api.ClusterNode; import org.apache.slider.api.ClusterNode;
import org.apache.slider.api.OptionKeys;
import org.apache.slider.api.ResourceKeys; import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.RoleKeys; import org.apache.slider.api.RoleKeys;
import org.apache.slider.api.resource.Application; import org.apache.slider.api.resource.Application;
@ -59,7 +58,6 @@ import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.net.URI;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -271,8 +269,8 @@ public class ProviderUtils implements RoleKeys, SliderKeys {
// 2. Add the config file to localResource // 2. Add the config file to localResource
public synchronized void createConfigFileAndAddLocalResource( public synchronized void createConfigFileAndAddLocalResource(
ContainerLauncher launcher, SliderFileSystem fs, Component component, ContainerLauncher launcher, SliderFileSystem fs, Component component,
Map<String, String> tokensForSubstitution, RoleInstance roleInstance) Map<String, String> tokensForSubstitution, RoleInstance roleInstance,
throws IOException { StateAccessForProviders appState) throws IOException {
Path compDir = Path compDir =
new Path(new Path(fs.getAppDir(), "components"), component.getName()); new Path(new Path(fs.getAppDir(), "components"), component.getName());
Path compInstanceDir = Path compInstanceDir =
@ -315,12 +313,12 @@ public class ProviderUtils implements RoleKeys, SliderKeys {
case HADOOP_XML: case HADOOP_XML:
// Hadoop_xml_template // Hadoop_xml_template
resolveHadoopXmlTemplateAndSaveOnHdfs(fs.getFileSystem(), resolveHadoopXmlTemplateAndSaveOnHdfs(fs.getFileSystem(),
tokensForSubstitution, configFile, remoteFile, roleInstance); tokensForSubstitution, configFile, remoteFile, appState);
break; break;
case TEMPLATE: case TEMPLATE:
// plain-template // plain-template
resolvePlainTemplateAndSaveOnHdfs(fs.getFileSystem(), resolvePlainTemplateAndSaveOnHdfs(fs.getFileSystem(),
tokensForSubstitution, configFile, remoteFile, roleInstance); tokensForSubstitution, configFile, remoteFile, appState);
break; break;
default: default:
log.info("Not supporting loading src_file for " + configFile); log.info("Not supporting loading src_file for " + configFile);
@ -383,11 +381,11 @@ public class ProviderUtils implements RoleKeys, SliderKeys {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private void resolveHadoopXmlTemplateAndSaveOnHdfs(FileSystem fs, private void resolveHadoopXmlTemplateAndSaveOnHdfs(FileSystem fs,
Map<String, String> tokensForSubstitution, ConfigFile configFile, Map<String, String> tokensForSubstitution, ConfigFile configFile,
Path remoteFile, RoleInstance roleInstance) throws IOException { Path remoteFile, StateAccessForProviders appState) throws IOException {
Map<String, String> conf; Map<String, String> conf;
try { try {
conf = (Map<String, String>) roleInstance.providerRole. conf = (Map<String, String>) appState.getConfigFileCache()
appState.configFileCache.get(configFile); .get(configFile);
} catch (ExecutionException e) { } catch (ExecutionException e) {
log.info("Failed to load config file: " + configFile, e); log.info("Failed to load config file: " + configFile, e);
return; return;
@ -426,17 +424,16 @@ public class ProviderUtils implements RoleKeys, SliderKeys {
// 3) save on hdfs // 3) save on hdfs
private void resolvePlainTemplateAndSaveOnHdfs(FileSystem fs, private void resolvePlainTemplateAndSaveOnHdfs(FileSystem fs,
Map<String, String> tokensForSubstitution, ConfigFile configFile, Map<String, String> tokensForSubstitution, ConfigFile configFile,
Path remoteFile, RoleInstance roleInstance) { Path remoteFile, StateAccessForProviders appState) {
String content; String content;
try { try {
content = (String) roleInstance.providerRole.appState.configFileCache content = (String) appState.getConfigFileCache().get(configFile);
.get(configFile);
} catch (ExecutionException e) { } catch (ExecutionException e) {
log.info("Failed to load config file: " + configFile, e); log.info("Failed to load config file: " + configFile, e);
return; return;
} }
// substitute tokens // substitute tokens
substituteStrWithTokens(content, tokensForSubstitution); content = substituteStrWithTokens(content, tokensForSubstitution);
try (OutputStream output = fs.create(remoteFile)) { try (OutputStream output = fs.create(remoteFile)) {
org.apache.commons.io.IOUtils.write(content, output); org.apache.commons.io.IOUtils.write(content, output);
@ -446,25 +443,13 @@ public class ProviderUtils implements RoleKeys, SliderKeys {
} }
/** /**
* Get initial token map to be substituted into config values. * Get initial component token map to be substituted into config values.
* @param appConf app configurations * @param roleInstance role instance
* @param clusterName app name
* @return tokens to replace * @return tokens to replace
*/ */
public Map<String, String> getStandardTokenMap(Configuration appConf, public Map<String, String> initCompTokensForSubstitute(
RoleInstance roleInstance, String clusterName) { RoleInstance roleInstance) {
Map<String, String> tokens = new HashMap<>(); Map<String, String> tokens = new HashMap<>();
String nnuri = appConf.getProperty("fs.defaultFS");
if (nnuri != null && !nnuri.isEmpty()) {
tokens.put("${NN_URI}", nnuri);
tokens.put("${NN_HOST}", URI.create(nnuri).getHost());
}
tokens.put("${ZK_HOST}", appConf.getProperty(OptionKeys.ZOOKEEPER_HOSTS));
tokens.put("${DEFAULT_ZK_PATH}", appConf.getProperty(OptionKeys.ZOOKEEPER_PATH));
tokens.put(SERVICE_NAME_LC, clusterName.toLowerCase());
tokens.put(SERVICE_NAME, clusterName);
tokens.put(COMPONENT_NAME, roleInstance.role); tokens.put(COMPONENT_NAME, roleInstance.role);
tokens.put(COMPONENT_NAME_LC, roleInstance.role.toLowerCase()); tokens.put(COMPONENT_NAME_LC, roleInstance.role.toLowerCase());
tokens.put(COMPONENT_INSTANCE_NAME, roleInstance.getCompInstanceName()); tokens.put(COMPONENT_INSTANCE_NAME, roleInstance.getCompInstanceName());

View File

@ -88,10 +88,10 @@ public class DockerProviderService extends AbstractService
// Generate tokens (key-value pair) for config substitution. // Generate tokens (key-value pair) for config substitution.
// Get pre-defined tokens // Get pre-defined tokens
Map<String, String> globalTokens = amState.getGlobalSubstitutionTokens();
Map<String, String> tokensForSubstitution = providerUtils Map<String, String> tokensForSubstitution = providerUtils
.getStandardTokenMap(application.getConfiguration(), roleInstance, .initCompTokensForSubstitute(roleInstance);
application.getName()); tokensForSubstitution.putAll(globalTokens);
// Set the environment variables in launcher // Set the environment variables in launcher
launcher.putEnv(SliderUtils launcher.putEnv(SliderUtils
.buildEnvMap(component.getConfiguration(), tokensForSubstitution)); .buildEnvMap(component.getConfiguration(), tokensForSubstitution));
@ -111,7 +111,7 @@ public class DockerProviderService extends AbstractService
// create config file on hdfs and add local resource // create config file on hdfs and add local resource
providerUtils.createConfigFileAndAddLocalResource(launcher, fileSystem, providerUtils.createConfigFileAndAddLocalResource(launcher, fileSystem,
component, tokensForSubstitution, roleInstance); component, tokensForSubstitution, roleInstance, amState);
// substitute launch command // substitute launch command
String launchCommand = ProviderUtils String launchCommand = ProviderUtils

View File

@ -150,10 +150,10 @@ public class RoleLaunchService
containerLauncher.setupUGI(); containerLauncher.setupUGI();
containerLauncher.putEnv(envVars); containerLauncher.putEnv(envVars);
String failedInstance = role.failedInstanceName.poll(); RoleInstance failedInstance = role.failedInstances.poll();
RoleInstance instance; RoleInstance instance;
if (failedInstance != null) { if (failedInstance != null) {
instance = new RoleInstance(container, role, failedInstance); instance = new RoleInstance(container, failedInstance);
} else { } else {
instance = new RoleInstance(container, role); instance = new RoleInstance(container, role);
} }

View File

@ -819,6 +819,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
binding.releaseSelector = new MostRecentContainerReleaseSelector(); binding.releaseSelector = new MostRecentContainerReleaseSelector();
binding.nodeReports = nodeReports; binding.nodeReports = nodeReports;
binding.application = application; binding.application = application;
binding.serviceHdfsDir = fs.buildClusterDirPath(appName).toString();
appState.buildInstance(binding); appState.buildInstance(binding);
// build up environment variables that the AM wants set in every container // build up environment variables that the AM wants set in every container

View File

@ -24,11 +24,11 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader; import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache; import com.google.common.cache.LoadingCache;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ContainerStatus;
@ -42,6 +42,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.slider.api.ClusterNode; import org.apache.slider.api.ClusterNode;
import org.apache.slider.api.InternalKeys; import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.ServiceApiConstants;
import org.apache.slider.api.StatusKeys; import org.apache.slider.api.StatusKeys;
import org.apache.slider.api.proto.Messages; import org.apache.slider.api.proto.Messages;
import org.apache.slider.api.proto.Messages.ComponentCountProto; import org.apache.slider.api.proto.Messages.ComponentCountProto;
@ -61,6 +62,7 @@ import org.apache.slider.core.exceptions.ErrorStrings;
import org.apache.slider.core.exceptions.NoSuchNodeException; import org.apache.slider.core.exceptions.NoSuchNodeException;
import org.apache.slider.core.exceptions.SliderInternalStateException; import org.apache.slider.core.exceptions.SliderInternalStateException;
import org.apache.slider.core.exceptions.TriggerClusterTeardownException; import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
import org.apache.slider.core.zk.ZKIntegration;
import org.apache.slider.providers.PlacementPolicy; import org.apache.slider.providers.PlacementPolicy;
import org.apache.slider.providers.ProviderRole; import org.apache.slider.providers.ProviderRole;
import org.apache.slider.server.appmaster.management.MetricsAndMonitoring; import org.apache.slider.server.appmaster.management.MetricsAndMonitoring;
@ -75,6 +77,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.net.URI;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -89,7 +92,12 @@ import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY;
import static org.apache.hadoop.registry.client.api.RegistryConstants.DEFAULT_REGISTRY_ZK_QUORUM;
import static org.apache.hadoop.registry.client.api.RegistryConstants.KEY_DNS_DOMAIN;
import static org.apache.hadoop.registry.client.api.RegistryConstants.KEY_REGISTRY_ZK_QUORUM;
import static org.apache.slider.api.ResourceKeys.*; import static org.apache.slider.api.ResourceKeys.*;
import static org.apache.slider.api.ServiceApiConstants.*;
import static org.apache.slider.api.StateValues.*; import static org.apache.slider.api.StateValues.*;
import static org.apache.slider.api.resource.ApplicationState.STARTED; import static org.apache.slider.api.resource.ApplicationState.STARTED;
@ -193,14 +201,13 @@ public class AppState {
private int containerMinMemory; private int containerMinMemory;
private RoleHistory roleHistory; private RoleHistory roleHistory;
private Configuration publishedProviderConf;
private long startTimeThreshold; private long startTimeThreshold;
private int failureThreshold = 10; private int failureThreshold = 10;
private int nodeFailureThreshold = 3; private int nodeFailureThreshold = 3;
private String logServerURL = ""; private String logServerURL = "";
public Map<String, String> globalTokens = new HashMap<>();
/** /**
* Selector of containers to release; application wide. * Selector of containers to release; application wide.
*/ */
@ -335,6 +342,7 @@ public class AppState {
DEFAULT_CONTAINER_FAILURE_THRESHOLD); DEFAULT_CONTAINER_FAILURE_THRESHOLD);
nodeFailureThreshold = conf.getPropertyInt(NODE_FAILURE_THRESHOLD, nodeFailureThreshold = conf.getPropertyInt(NODE_FAILURE_THRESHOLD,
DEFAULT_NODE_FAILURE_THRESHOLD); DEFAULT_NODE_FAILURE_THRESHOLD);
initGlobalTokensForSubstitute(binding);
//build the initial component list //build the initial component list
int priority = 1; int priority = 1;
@ -367,6 +375,34 @@ public class AppState {
createConfigFileCache(binding.fs); createConfigFileCache(binding.fs);
} }
private void initGlobalTokensForSubstitute(AppStateBindingInfo binding)
throws IOException {
// ZK
globalTokens.put(ServiceApiConstants.CLUSTER_ZK_QUORUM,
binding.serviceConfig
.getTrimmed(KEY_REGISTRY_ZK_QUORUM, DEFAULT_REGISTRY_ZK_QUORUM));
String user = UserGroupInformation.getCurrentUser().getShortUserName();
globalTokens
.put(SERVICE_ZK_PATH, ZKIntegration.mkClusterPath(user, app.getName()));
globalTokens.put(ServiceApiConstants.USER, user);
String dnsDomain = binding.serviceConfig.getTrimmed(KEY_DNS_DOMAIN);
if (dnsDomain != null && !dnsDomain.isEmpty()) {
globalTokens.put(ServiceApiConstants.DOMAIN, dnsDomain);
}
// HDFS
String clusterFs = binding.serviceConfig.getTrimmed(FS_DEFAULT_NAME_KEY);
if (clusterFs != null && !clusterFs.isEmpty()) {
globalTokens.put(ServiceApiConstants.CLUSTER_FS_URI, clusterFs);
globalTokens.put(ServiceApiConstants.CLUSTER_FS_HOST,
URI.create(clusterFs).getHost());
}
globalTokens.put(SERVICE_HDFS_DIR, binding.serviceHdfsDir);
// service name
globalTokens.put(SERVICE_NAME_LC, app.getName().toLowerCase());
globalTokens.put(SERVICE_NAME, app.getName());
}
private void createConfigFileCache(final FileSystem fileSystem) { private void createConfigFileCache(final FileSystem fileSystem) {
this.configFileCache = this.configFileCache =
CacheBuilder.newBuilder().expireAfterAccess(10, TimeUnit.MINUTES) CacheBuilder.newBuilder().expireAfterAccess(10, TimeUnit.MINUTES)
@ -411,7 +447,7 @@ public class AppState {
DEF_YARN_LABEL_EXPRESSION); DEF_YARN_LABEL_EXPRESSION);
ProviderRole newRole = ProviderRole newRole =
new ProviderRole(name, group, priority, (int)placementPolicy, threshold, new ProviderRole(name, group, priority, (int)placementPolicy, threshold,
placementTimeout, label, component, this); placementTimeout, label, component);
buildRole(newRole, component); buildRole(newRole, component);
log.info("Created a new role " + newRole); log.info("Created a new role " + newRole);
return newRole; return newRole;
@ -1300,8 +1336,7 @@ public class AppState {
try { try {
RoleStatus roleStatus = lookupRoleStatus(roleInstance.roleId); RoleStatus roleStatus = lookupRoleStatus(roleInstance.roleId);
decRunningContainers(roleStatus); decRunningContainers(roleStatus);
roleStatus.getProviderRole().failedInstanceName roleStatus.getProviderRole().failedInstances.offer(roleInstance);
.offer(roleInstance.compInstanceName);
boolean shortLived = isShortLived(roleInstance); boolean shortLived = isShortLived(roleInstance);
String message; String message;
Container failedContainer = roleInstance.container; Container failedContainer = roleInstance.container;
@ -1742,8 +1777,7 @@ public class AppState {
for (RoleInstance possible : finalCandidates) { for (RoleInstance possible : finalCandidates) {
log.info("Targeting for release: {}", possible); log.info("Targeting for release: {}", possible);
containerReleaseSubmitted(possible.container); containerReleaseSubmitted(possible.container);
role.getProviderRole().failedInstanceName role.getProviderRole().failedInstances.offer(possible);
.offer(possible.compInstanceName);
operations.add(new ContainerReleaseOperation(possible.getContainerId())); operations.add(new ContainerReleaseOperation(possible.getContainerId()));
} }
} }
@ -1862,7 +1896,6 @@ public class AppState {
//get the role //get the role
final ContainerId cid = container.getId(); final ContainerId cid = container.getId();
final RoleStatus role = lookupRoleStatus(container); final RoleStatus role = lookupRoleStatus(container);
decRequestedContainers(role);
//inc allocated count -this may need to be dropped in a moment, //inc allocated count -this may need to be dropped in a moment,
// but us needed to update the logic below // but us needed to update the logic below
@ -1888,6 +1921,7 @@ public class AppState {
role.getComponentMetrics().surplusContainers.incr(); role.getComponentMetrics().surplusContainers.incr();
containersRunning.decr(); containersRunning.decr();
} else { } else {
decRequestedContainers(role);
log.info("Assigning role {} to container" + " {}," + " on {}:{},", log.info("Assigning role {} to container" + " {}," + " on {}:{},",
roleName, cid, nodeId.getHost(), nodeId.getPort()); roleName, cid, nodeId.getHost(), nodeId.getPort());

View File

@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.slider.api.resource.Application; import org.apache.slider.api.resource.Application;
import org.apache.slider.common.tools.CoreFileSystem;
import org.apache.slider.providers.ProviderRole; import org.apache.slider.providers.ProviderRole;
import java.util.ArrayList; import java.util.ArrayList;
@ -45,6 +46,7 @@ public class AppStateBindingInfo {
public Path historyPath; public Path historyPath;
public List<Container> liveContainers = new ArrayList<>(0); public List<Container> liveContainers = new ArrayList<>(0);
public ContainerReleaseSelector releaseSelector = new SimpleReleaseSelector(); public ContainerReleaseSelector releaseSelector = new SimpleReleaseSelector();
public String serviceHdfsDir = "";
/** node reports off the RM. */ /** node reports off the RM. */
public List<NodeReport> nodeReports = new ArrayList<>(0); public List<NodeReport> nodeReports = new ArrayList<>(0);

View File

@ -18,11 +18,13 @@
package org.apache.slider.server.appmaster.state; package org.apache.slider.server.appmaster.state;
import com.google.common.cache.LoadingCache;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.slider.api.ClusterNode; import org.apache.slider.api.ClusterNode;
import org.apache.slider.api.resource.Application; import org.apache.slider.api.resource.Application;
import org.apache.slider.api.resource.ConfigFile;
import org.apache.slider.api.types.ApplicationLivenessInformation; import org.apache.slider.api.types.ApplicationLivenessInformation;
import org.apache.slider.api.types.ComponentInformation; import org.apache.slider.api.types.ComponentInformation;
import org.apache.slider.api.types.NodeInformation; import org.apache.slider.api.types.NodeInformation;
@ -262,4 +264,14 @@ public class ProviderAppState implements StateAccessForProviders {
public RoleStatistics getRoleStatistics() { public RoleStatistics getRoleStatistics() {
return appState.getRoleStatistics(); return appState.getRoleStatistics();
} }
@Override
public Map<String, String> getGlobalSubstitutionTokens() {
return appState.globalTokens;
}
@Override
public LoadingCache<ConfigFile, Object> getConfigFileCache() {
return appState.configFileCache;
}
} }

View File

@ -128,11 +128,11 @@ public final class RoleInstance implements Cloneable {
this.providerRole = role; this.providerRole = role;
} }
public RoleInstance(Container container, ProviderRole role, public RoleInstance(Container container, RoleInstance failedInstance) {
String compInstanceName) {
this(container); this(container);
this.compInstanceName = compInstanceName; this.componentId = failedInstance.componentId;
this.providerRole = role; this.compInstanceName = failedInstance.compInstanceName;
this.providerRole = failedInstance.providerRole;
} }
/** /**

View File

@ -18,12 +18,14 @@
package org.apache.slider.server.appmaster.state; package org.apache.slider.server.appmaster.state;
import com.google.common.cache.LoadingCache;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.slider.api.ClusterNode; import org.apache.slider.api.ClusterNode;
import org.apache.slider.api.StatusKeys; import org.apache.slider.api.StatusKeys;
import org.apache.slider.api.resource.Application; import org.apache.slider.api.resource.Application;
import org.apache.slider.api.resource.ConfigFile;
import org.apache.slider.api.types.ApplicationLivenessInformation; import org.apache.slider.api.types.ApplicationLivenessInformation;
import org.apache.slider.api.types.ComponentInformation; import org.apache.slider.api.types.ComponentInformation;
import org.apache.slider.api.types.NodeInformation; import org.apache.slider.api.types.NodeInformation;
@ -260,4 +262,14 @@ public interface StateAccessForProviders {
* @return role statistics * @return role statistics
*/ */
RoleStatistics getRoleStatistics(); RoleStatistics getRoleStatistics();
/**
* Get global substitution tokens.
*/
Map<String, String> getGlobalSubstitutionTokens();
/**
* Get config file cache.
*/
LoadingCache<ConfigFile, Object> getConfigFileCache();
} }

View File

@ -379,4 +379,52 @@ public class TestMockAppStateRMOperations extends BaseMockAppStateTest
assertNull(ri3); assertNull(ri3);
} }
@Test
public void testDoubleAllocate() throws Throwable {
getRole0Status().setDesired(1);
List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes();
ContainerRequestOperation operation = (ContainerRequestOperation)ops.get(0);
AMRMClient.ContainerRequest request = operation.getRequest();
Container cont = engine.allocateContainer(request);
List<Container> allocated = new ArrayList<>();
allocated.add(cont);
List<ContainerAssignment> assignments = new ArrayList<>();
List<AbstractRMOperation> operations = new ArrayList<>();
assertEquals(0L, getRole0Status().getRunning());
assertEquals(1L, getRole0Status().getRequested());
appState.onContainersAllocated(allocated, assignments, operations);
assertListLength(ops, 1);
assertListLength(assignments, 1);
ContainerAssignment assigned = assignments.get(0);
Container target = assigned.container;
assertEquals(target.getId(), cont.getId());
int roleId = assigned.role.getPriority();
assertEquals(roleId, extractRole(request.getPriority()));
assertEquals(assigned.role.getName(), ROLE0);
RoleInstance ri = roleInstance(assigned);
//tell the app it arrived
appState.containerStartSubmitted(target, ri);
appState.innerOnNodeManagerContainerStarted(target.getId());
assertEquals(1L, getRole0Status().getRunning());
assertEquals(0L, getRole0Status().getRequested());
// now get an extra allocation that should be released
cont = engine.allocateContainer(request);
allocated = new ArrayList<>();
allocated.add(cont);
assignments = new ArrayList<>();
operations = new ArrayList<>();
appState.onContainersAllocated(allocated, assignments, operations);
assertListLength(operations, 1);
assertTrue(operations.get(0) instanceof ContainerReleaseOperation);
ContainerReleaseOperation release = (ContainerReleaseOperation)
operations.get(0);
assertEquals(release.getContainerId(), cont.getId());
assertEquals(1L, getRole0Status().getRunning());
assertEquals(0L, getRole0Status().getRequested());
}
} }

View File

@ -99,6 +99,7 @@ public class TestMockAppStateUniqueNames extends BaseMockAppStateTest
assertEquals(roles[i], entry.getKey()); assertEquals(roles[i], entry.getKey());
RoleInstance instance = entry.getValue(); RoleInstance instance = entry.getValue();
assertEquals(roles[i], instance.compInstanceName); assertEquals(roles[i], instance.compInstanceName);
assertEquals(i, instance.componentId);
assertEquals(group, instance.role); assertEquals(group, instance.role);
assertEquals(group, instance.providerRole.name); assertEquals(group, instance.providerRole.name);
assertEquals(group, instance.providerRole.group); assertEquals(group, instance.providerRole.group);
@ -129,7 +130,6 @@ public class TestMockAppStateUniqueNames extends BaseMockAppStateTest
createAndStartNodes(); createAndStartNodes();
instances = appState.cloneOwnedContainerList(); instances = appState.cloneOwnedContainerList();
verifyInstances(instances, "group1", "group10", "group11", "group12"); verifyInstances(instances, "group1", "group10", "group11", "group12");
// fails because the names continue at N+1, with group12, group13, group14
} }
@Test @Test

View File

@ -176,11 +176,11 @@ public abstract class BaseMockAppStateTest extends SliderTestBase implements
*/ */
public RoleInstance roleInstance(ContainerAssignment assigned) { public RoleInstance roleInstance(ContainerAssignment assigned) {
Container target = assigned.container; Container target = assigned.container;
String failedInstance = RoleInstance failedInstance =
assigned.role.getProviderRole().failedInstanceName.poll(); assigned.role.getProviderRole().failedInstances.poll();
RoleInstance ri; RoleInstance ri;
if (failedInstance != null) { if (failedInstance != null) {
ri = new RoleInstance(target, assigned.role.getProviderRole(), failedInstance); ri = new RoleInstance(target, failedInstance);
} else { } else {
ri = new RoleInstance(target, assigned.role.getProviderRole()); ri = new RoleInstance(target, assigned.role.getProviderRole());
} }

View File

@ -811,6 +811,8 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
LOG.error("Incorrect format for ip and host"); LOG.error("Incorrect format for ip and host");
return null; return null;
} }
// strip off quotes if any
output = output.replaceAll("['\"]", "");
String ips = output.substring(0, index).trim(); String ips = output.substring(0, index).trim();
String host = output.substring(index+1).trim(); String host = output.substring(index+1).trim();
String[] ipAndHost = new String[2]; String[] ipAndHost = new String[2];