YARN-9719. Fixed YARN service restart bug when application ID no longer exist in RM.

Contributed by kyungwan nam
This commit is contained in:
Eric Yang 2019-08-12 18:24:00 -04:00
parent e4b538bbda
commit 201dc667e9
3 changed files with 43 additions and 1 deletions

View File

@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.client.cli.ApplicationCLI; import org.apache.hadoop.yarn.client.cli.ApplicationCLI;
import org.apache.hadoop.yarn.client.util.YarnClientUtils; import org.apache.hadoop.yarn.client.util.YarnClientUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.proto.ClientAMProtocol.CancelUpgradeRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.CancelUpgradeRequestProto;
@ -1558,7 +1559,17 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
return appSpec; return appSpec;
} }
appSpec.setId(currentAppId.toString()); appSpec.setId(currentAppId.toString());
ApplicationReport appReport = yarnClient.getApplicationReport(currentAppId); ApplicationReport appReport = null;
try {
appReport = yarnClient.getApplicationReport(currentAppId);
} catch (ApplicationNotFoundException e) {
LOG.info("application ID {} doesn't exist", currentAppId);
return appSpec;
}
if (appReport == null) {
LOG.warn("application ID {} is reported as null", currentAppId);
return appSpec;
}
appSpec.setState(convertState(appReport.getYarnApplicationState())); appSpec.setState(convertState(appReport.getYarnApplicationState()));
ApplicationTimeout lifetime = ApplicationTimeout lifetime =
appReport.getApplicationTimeouts().get(ApplicationTimeoutType.LIFETIME); appReport.getApplicationTimeouts().get(ApplicationTimeoutType.LIFETIME);

View File

@ -218,6 +218,8 @@ public class ServiceTestUtils {
setConf(new YarnConfiguration()); setConf(new YarnConfiguration());
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, false); conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, false);
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, false); conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, false);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
} }
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
// reduce the teardown waiting time // reduce the teardown waiting time

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest;
import org.apache.hadoop.yarn.api.records.*; import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.service.api.records.Component; import org.apache.hadoop.yarn.service.api.records.Component;
@ -326,6 +327,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, true); conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, true);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
setConf(conf); setConf(conf);
setupInternal(NUM_NMS); setupInternal(NUM_NMS);
@ -518,6 +521,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
YarnConfiguration conf = new YarnConfiguration(); YarnConfiguration conf = new YarnConfiguration();
conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER, conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER); YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
setConf(conf); setConf(conf);
setupInternal(3); setupInternal(3);
ServiceClient client = createClient(getConf()); ServiceClient client = createClient(getConf());
@ -727,6 +732,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
YarnConfiguration conf = new YarnConfiguration(); YarnConfiguration conf = new YarnConfiguration();
conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER, conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER); YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
setConf(conf); setConf(conf);
setupInternal(3); setupInternal(3);
ServiceClient client = createClient(getConf()); ServiceClient client = createClient(getConf());
@ -909,4 +916,26 @@ public class TestYarnNativeServices extends ServiceTestUtils {
i++; i++;
} }
} }
@Test (timeout = 200000)
public void testRestartServiceForNonExistingInRM() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 0);
setConf(conf);
setupInternal(NUM_NMS);
ServiceClient client = createClient(getConf());
Service exampleApp = createExampleApplication();
client.actionCreate(exampleApp);
waitForServiceToBeStable(client, exampleApp);
try {
client.actionStop(exampleApp.getName(), true);
} catch (ApplicationNotFoundException e) {
LOG.info("ignore ApplicationNotFoundException during stopping");
}
client.actionStart(exampleApp.getName());
waitForServiceToBeStable(client, exampleApp);
Service service = client.getStatus(exampleApp.getName());
Assert.assertEquals("Restarted service state should be STABLE",
ServiceState.STABLE, service.getState());
}
} }