YARN-8411. Restart stopped system service during RM start.

Contributed by Billie Rinaldi
This commit is contained in:
Eric Yang 2018-06-13 19:05:52 -04:00
parent 7566e0ec5f
commit 69b0596897
5 changed files with 72 additions and 18 deletions

View File

@ -29,7 +29,9 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.service.SystemServiceManager; import org.apache.hadoop.yarn.server.service.SystemServiceManager;
import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.Service;
import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.conf.SliderExitCodes;
import org.apache.hadoop.yarn.service.conf.YarnServiceConf; import org.apache.hadoop.yarn.service.conf.YarnServiceConf;
import org.apache.hadoop.yarn.service.exceptions.SliderException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -228,14 +230,33 @@ public class SystemServiceManagerImpl extends AbstractService
userUgi.doAs(new PrivilegedExceptionAction<ApplicationId>() { userUgi.doAs(new PrivilegedExceptionAction<ApplicationId>() {
@Override public ApplicationId run() @Override public ApplicationId run()
throws IOException, YarnException { throws IOException, YarnException {
ApplicationId applicationId = serviceClient.actionCreate(service); boolean tryStart = true;
return applicationId; try {
serviceClient.actionBuild(service);
} catch (Exception e) {
if (e instanceof SliderException && ((SliderException) e)
.getExitCode() == SliderExitCodes.EXIT_INSTANCE_EXISTS) {
LOG.info("Service {} already exists, will attempt to start " +
"service", service.getName());
} else {
tryStart = false;
LOG.info("Got exception saving {}, will not attempt to " +
"start service", service.getName(), e);
}
}
if (tryStart) {
return serviceClient.actionStartAndGetId(service.getName());
} else {
return null;
}
} }
}); });
if (applicationId != null) {
LOG.info("Service {} submitted with Application ID: {}", LOG.info("Service {} submitted with Application ID: {}",
service.getName(), applicationId); service.getName(), applicationId);
} }
} }
}
ServiceClient getServiceClient() { ServiceClient getServiceClient() {
return new ServiceClient(); return new ServiceClient();

View File

@ -648,8 +648,7 @@ public class ApiServer {
ServiceClient sc = getServiceClient(); ServiceClient sc = getServiceClient();
sc.init(YARN_CONFIG); sc.init(YARN_CONFIG);
sc.start(); sc.start();
sc.actionStart(appName); ApplicationId appId = sc.actionStartAndGetId(appName);
ApplicationId appId = sc.getAppId(appName);
sc.close(); sc.close();
return appId; return appId;
} }

View File

@ -103,13 +103,13 @@ public class ServiceClientTest extends ServiceClient {
} }
@Override @Override
public int actionStart(String serviceName) public ApplicationId actionStartAndGetId(String serviceName)
throws YarnException, IOException { throws YarnException, IOException {
if (serviceName != null && serviceName.equals("jenkins")) { if (serviceName != null && serviceName.equals("jenkins")) {
ApplicationId appId = ApplicationId appId =
ApplicationId.newInstance(System.currentTimeMillis(), 1); ApplicationId.newInstance(System.currentTimeMillis(), 1);
serviceAppId.put(serviceName, appId); serviceAppId.put(serviceName, appId);
return EXIT_SUCCESS; return appId;
} else { } else {
throw new ApplicationNotFoundException(""); throw new ApplicationNotFoundException("");
} }

View File

@ -22,7 +22,9 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.Service;
import org.apache.hadoop.yarn.service.conf.SliderExitCodes;
import org.apache.hadoop.yarn.service.conf.YarnServiceConf; import org.apache.hadoop.yarn.service.conf.YarnServiceConf;
import org.apache.hadoop.yarn.service.exceptions.SliderException;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
@ -51,6 +53,7 @@ public class TestSystemServiceManagerImpl {
private String[] users = new String[] {"user1", "user2"}; private String[] users = new String[] {"user1", "user2"};
private static Map<String, Set<String>> loadedServices = new HashMap<>(); private static Map<String, Set<String>> loadedServices = new HashMap<>();
private static Map<String, Set<String>> savedServices = new HashMap<>();
private static Map<String, Set<String>> submittedServices = new HashMap<>(); private static Map<String, Set<String>> submittedServices = new HashMap<>();
@Before @Before
@ -72,7 +75,7 @@ public class TestSystemServiceManagerImpl {
} }
@After @After
public void teadDown() { public void tearDown() {
systemService.stop(); systemService.stop();
} }
@ -102,6 +105,11 @@ public class TestSystemServiceManagerImpl {
// 2nd time launch service to handle if service exist scenario // 2nd time launch service to handle if service exist scenario
systemService.launchUserService(userServices); systemService.launchUserService(userServices);
verifyForLaunchedUserServices(); verifyForLaunchedUserServices();
// verify start of stopped services
submittedServices.clear();
systemService.launchUserService(userServices);
verifyForLaunchedUserServices();
} }
private void verifyForScannedUserServices( private void verifyForScannedUserServices(
@ -149,7 +157,27 @@ public class TestSystemServiceManagerImpl {
} }
@Override @Override
public ApplicationId actionCreate(Service service) public int actionBuild(Service service)
throws YarnException, IOException {
String userName =
UserGroupInformation.getCurrentUser().getShortUserName();
Set<String> services = savedServices.get(userName);
if (services == null) {
services = new HashSet<>();
savedServices.put(userName, services);
}
if (services.contains(service.getName())) {
String message = "Failed to save service " + service.getName()
+ ", because it already exists.";
throw new SliderException(SliderExitCodes.EXIT_INSTANCE_EXISTS,
message);
}
services.add(service.getName());
return 0;
}
@Override
public ApplicationId actionStartAndGetId(String serviceName)
throws YarnException, IOException { throws YarnException, IOException {
String userName = String userName =
UserGroupInformation.getCurrentUser().getShortUserName(); UserGroupInformation.getCurrentUser().getShortUserName();
@ -158,12 +186,12 @@ public class TestSystemServiceManagerImpl {
services = new HashSet<>(); services = new HashSet<>();
submittedServices.put(userName, services); submittedServices.put(userName, services);
} }
if (services.contains(service.getName())) { if (services.contains(serviceName)) {
String message = "Failed to create service " + service.getName() String message = "Failed to create service " + serviceName
+ ", because it already exists."; + ", because it is already running.";
throw new YarnException(message); throw new YarnException(message);
} }
services.add(service.getName()); services.add(serviceName);
return ApplicationId.newInstance(System.currentTimeMillis(), 1); return ApplicationId.newInstance(System.currentTimeMillis(), 1);
} }
} }

View File

@ -1003,6 +1003,12 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
@Override @Override
public int actionStart(String serviceName) throws YarnException, IOException { public int actionStart(String serviceName) throws YarnException, IOException {
actionStartAndGetId(serviceName);
return EXIT_SUCCESS;
}
public ApplicationId actionStartAndGetId(String serviceName) throws
YarnException, IOException {
ServiceApiUtil.validateNameFormat(serviceName, getConfig()); ServiceApiUtil.validateNameFormat(serviceName, getConfig());
Service liveService = getStatus(serviceName); Service liveService = getStatus(serviceName);
if (liveService == null || if (liveService == null ||
@ -1019,11 +1025,11 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
// write app definition on to hdfs // write app definition on to hdfs
Path appJson = ServiceApiUtil.writeAppDefinition(fs, appDir, service); Path appJson = ServiceApiUtil.writeAppDefinition(fs, appDir, service);
LOG.info("Persisted service " + service.getName() + " at " + appJson); LOG.info("Persisted service " + service.getName() + " at " + appJson);
return 0; return appId;
} else { } else {
LOG.info("Finalize service {} upgrade"); LOG.info("Finalize service {} upgrade");
ApplicationReport appReport = ApplicationId appId = getAppId(serviceName);
yarnClient.getApplicationReport(getAppId(serviceName)); ApplicationReport appReport = yarnClient.getApplicationReport(appId);
if (StringUtils.isEmpty(appReport.getHost())) { if (StringUtils.isEmpty(appReport.getHost())) {
throw new YarnException(serviceName + " AM hostname is empty"); throw new YarnException(serviceName + " AM hostname is empty");
} }
@ -1032,7 +1038,7 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
RestartServiceRequestProto.Builder requestBuilder = RestartServiceRequestProto.Builder requestBuilder =
RestartServiceRequestProto.newBuilder(); RestartServiceRequestProto.newBuilder();
proxy.restart(requestBuilder.build()); proxy.restart(requestBuilder.build());
return 0; return appId;
} }
} }