YARN-9719. Fixed YARN service restart bug when application ID no longer exist in RM.

Contributed by kyungwan nam
This commit is contained in:
Eric Yang 2019-08-12 18:24:00 -04:00
parent e4b538bbda
commit 201dc667e9
3 changed files with 43 additions and 1 deletions

View File

@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.client.cli.ApplicationCLI;
import org.apache.hadoop.yarn.client.util.YarnClientUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.proto.ClientAMProtocol.CancelUpgradeRequestProto;
@ -1558,7 +1559,17 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
return appSpec;
}
appSpec.setId(currentAppId.toString());
ApplicationReport appReport = yarnClient.getApplicationReport(currentAppId);
ApplicationReport appReport = null;
try {
appReport = yarnClient.getApplicationReport(currentAppId);
} catch (ApplicationNotFoundException e) {
LOG.info("application ID {} doesn't exist", currentAppId);
return appSpec;
}
if (appReport == null) {
LOG.warn("application ID {} is reported as null", currentAppId);
return appSpec;
}
appSpec.setState(convertState(appReport.getYarnApplicationState()));
ApplicationTimeout lifetime =
appReport.getApplicationTimeouts().get(ApplicationTimeoutType.LIFETIME);

View File

@ -218,6 +218,8 @@ public class ServiceTestUtils {
setConf(new YarnConfiguration());
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, false);
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, false);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
}
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
// reduce the teardown waiting time

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.service.api.records.Component;
@ -326,6 +327,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, true);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
setConf(conf);
setupInternal(NUM_NMS);
@ -518,6 +521,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
YarnConfiguration conf = new YarnConfiguration();
conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
setConf(conf);
setupInternal(3);
ServiceClient client = createClient(getConf());
@ -727,6 +732,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
YarnConfiguration conf = new YarnConfiguration();
conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
setConf(conf);
setupInternal(3);
ServiceClient client = createClient(getConf());
@ -909,4 +916,26 @@ public class TestYarnNativeServices extends ServiceTestUtils {
i++;
}
}
@Test (timeout = 200000)
public void testRestartServiceForNonExistingInRM() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 0);
setConf(conf);
setupInternal(NUM_NMS);
ServiceClient client = createClient(getConf());
Service exampleApp = createExampleApplication();
client.actionCreate(exampleApp);
waitForServiceToBeStable(client, exampleApp);
try {
client.actionStop(exampleApp.getName(), true);
} catch (ApplicationNotFoundException e) {
LOG.info("ignore ApplicationNotFoundException during stopping");
}
client.actionStart(exampleApp.getName());
waitForServiceToBeStable(client, exampleApp);
Service service = client.getStatus(exampleApp.getName());
Assert.assertEquals("Restarted service state should be STABLE",
ServiceState.STABLE, service.getState());
}
}