YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager gets connection issue. Contributed by Xuan Gong

(cherry picked from commit 214fd1408c21f596d1d15217c11b58b34561aab7)
This commit is contained in:
Jian He 2015-09-02 17:45:23 -07:00 committed by Sangjin Lee
parent cc30002bc8
commit c09bb46579
9 changed files with 43 additions and 3 deletions

View File

@ -1102,4 +1102,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
((appData == null) ? "null" : StringUtils.byteToHexString(appData)) + ((appData == null) ? "null" : StringUtils.byteToHexString(appData)) +
" cb=" + appClient; " cb=" + appClient;
} }
public String getHAZookeeperConnectionState() {
return this.zkConnectionState.name();
}
} }

View File

@ -11,6 +11,9 @@ Release 2.6.2 - UNRELEASED
YARN-4092. Fixed UI redirection to print useful messages when both RMs are YARN-4092. Fixed UI redirection to print useful messages when both RMs are
in standby mode. (Xuan Gong via jianhe) in standby mode. (Xuan Gong via jianhe)
YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager
gets connection issue. (Xuan Gong via jianhe)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -722,4 +722,13 @@ public class AdminService extends CompositeService implements
throw RPCUtil.getRemoteException(ioe); throw RPCUtil.getRemoteException(ioe);
} }
} }
public String getHAZookeeperConnectionState() {
if (!rmContext.isHAEnabled()) {
return "ResourceManager HA is not enabled.";
} else if (!autoFailoverEnabled) {
return "Auto Failover is not enabled.";
}
return this.embeddedElector.getHAZookeeperConnectionState();
}
} }

View File

@ -205,4 +205,8 @@ public class EmbeddedElectorService extends AbstractService
elector.quitElection(false); elector.quitElection(false);
elector.joinElection(localActiveNodeInfo); elector.joinElection(localActiveNodeInfo);
} }
public String getHAZookeeperConnectionState() {
return elector.getHAZookeeperConnectionState();
}
} }

View File

@ -44,6 +44,8 @@ public class AboutBlock extends HtmlBlock {
_("Cluster ID:", cinfo.getClusterId()). _("Cluster ID:", cinfo.getClusterId()).
_("ResourceManager state:", cinfo.getState()). _("ResourceManager state:", cinfo.getState()).
_("ResourceManager HA state:", cinfo.getHAState()). _("ResourceManager HA state:", cinfo.getHAState()).
_("ResourceManager HA zookeeper connection state:",
cinfo.getHAZookeeperConnectionState()).
_("ResourceManager RMStateStore:", cinfo.getRMStateStore()). _("ResourceManager RMStateStore:", cinfo.getRMStateStore()).
_("ResourceManager started on:", Times.format(cinfo.getStartedOn())). _("ResourceManager started on:", Times.format(cinfo.getStartedOn())).
_("ResourceManager version:", cinfo.getRMBuildVersion() + _("ResourceManager version:", cinfo.getRMBuildVersion() +

View File

@ -113,4 +113,9 @@ public class RMWebApp extends WebApp implements YarnWebParams {
} }
return path; return path;
} }
public String getHAZookeeperConnectionState() {
return rm.getRMContext().getRMAdminService()
.getHAZookeeperConnectionState();
}
} }

View File

@ -58,6 +58,7 @@ public class RMWebAppFilter extends GuiceContainer {
private String path; private String path;
private static final int BASIC_SLEEP_TIME = 5; private static final int BASIC_SLEEP_TIME = 5;
private static final int MAX_SLEEP_TIME = 5 * 60; private static final int MAX_SLEEP_TIME = 5 * 60;
private static final Random randnum = new Random();
@Inject @Inject
public RMWebAppFilter(Injector injector, Configuration conf) { public RMWebAppFilter(Injector injector, Configuration conf) {
@ -126,6 +127,8 @@ public class RMWebAppFilter extends GuiceContainer {
String redirectMsg = String redirectMsg =
doRetry ? "Can not find any active RM. Will retry in next " + next doRetry ? "Can not find any active RM. Will retry in next " + next
+ " seconds." : "There is no active RM right now."; + " seconds." : "There is no active RM right now.";
redirectMsg += "\nHA Zookeeper Connection State: "
+ rmWebApp.getHAZookeeperConnectionState();
PrintWriter out = response.getWriter(); PrintWriter out = response.getWriter();
out.println(redirectMsg); out.println(redirectMsg);
if (doRetry) { if (doRetry) {
@ -172,6 +175,6 @@ public class RMWebAppFilter extends GuiceContainer {
private static int calculateExponentialTime(int retries) { private static int calculateExponentialTime(int retries) {
long baseTime = BASIC_SLEEP_TIME * (1L << retries); long baseTime = BASIC_SLEEP_TIME * (1L << retries);
return (int) (baseTime * ((new Random()).nextDouble() + 0.5)); return (int) (baseTime * (randnum.nextDouble() + 0.5));
} }
} }

View File

@ -43,6 +43,7 @@ public class ClusterInfo {
protected String hadoopVersion; protected String hadoopVersion;
protected String hadoopBuildVersion; protected String hadoopBuildVersion;
protected String hadoopVersionBuiltOn; protected String hadoopVersionBuiltOn;
protected String haZooKeeperConnectionState;
public ClusterInfo() { public ClusterInfo() {
} // JAXB needs this } // JAXB needs this
@ -62,6 +63,8 @@ public class ClusterInfo {
this.hadoopVersion = VersionInfo.getVersion(); this.hadoopVersion = VersionInfo.getVersion();
this.hadoopBuildVersion = VersionInfo.getBuildVersion(); this.hadoopBuildVersion = VersionInfo.getBuildVersion();
this.hadoopVersionBuiltOn = VersionInfo.getDate(); this.hadoopVersionBuiltOn = VersionInfo.getDate();
this.haZooKeeperConnectionState =
rm.getRMContext().getRMAdminService().getHAZookeeperConnectionState();
} }
public String getState() { public String getState() {
@ -108,4 +111,7 @@ public class ClusterInfo {
return this.startedOn; return this.startedOn;
} }
public String getHAZookeeperConnectionState() {
return this.haZooKeeperConnectionState;
}
} }

View File

@ -261,6 +261,8 @@ public class TestRMWebServices extends JerseyTest {
WebServicesTestUtils.getXmlLong(element, "startedOn"), WebServicesTestUtils.getXmlLong(element, "startedOn"),
WebServicesTestUtils.getXmlString(element, "state"), WebServicesTestUtils.getXmlString(element, "state"),
WebServicesTestUtils.getXmlString(element, "haState"), WebServicesTestUtils.getXmlString(element, "haState"),
WebServicesTestUtils.getXmlString(
element, "haZooKeeperConnectionState"),
WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"), WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"),
WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"), WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"),
WebServicesTestUtils.getXmlString(element, "hadoopVersion"), WebServicesTestUtils.getXmlString(element, "hadoopVersion"),
@ -276,9 +278,10 @@ public class TestRMWebServices extends JerseyTest {
Exception { Exception {
assertEquals("incorrect number of elements", 1, json.length()); assertEquals("incorrect number of elements", 1, json.length());
JSONObject info = json.getJSONObject("clusterInfo"); JSONObject info = json.getJSONObject("clusterInfo");
assertEquals("incorrect number of elements", 11, info.length()); assertEquals("incorrect number of elements", 12, info.length());
verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"), verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"),
info.getString("state"), info.getString("haState"), info.getString("state"), info.getString("haState"),
info.getString("haZooKeeperConnectionState"),
info.getString("hadoopVersionBuiltOn"), info.getString("hadoopVersionBuiltOn"),
info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"), info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"),
info.getString("resourceManagerVersionBuiltOn"), info.getString("resourceManagerVersionBuiltOn"),
@ -288,7 +291,8 @@ public class TestRMWebServices extends JerseyTest {
} }
public void verifyClusterGeneric(long clusterid, long startedon, public void verifyClusterGeneric(long clusterid, long startedon,
String state, String haState, String hadoopVersionBuiltOn, String state, String haState, String haZooKeeperConnectionState,
String hadoopVersionBuiltOn,
String hadoopBuildVersion, String hadoopVersion, String hadoopBuildVersion, String hadoopVersion,
String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion, String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion,
String resourceManagerVersion) { String resourceManagerVersion) {