YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager gets connection issue. Contributed by Xuan Gong

(cherry picked from commit 09c64ba1ba)
This commit is contained in:
Jian He 2015-09-02 17:45:23 -07:00
parent c2ed7e4a09
commit 2d1ff2e1ca
9 changed files with 43 additions and 3 deletions

View File

@ -1118,4 +1118,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
((appData == null) ? "null" : StringUtils.byteToHexString(appData)) + ((appData == null) ? "null" : StringUtils.byteToHexString(appData)) +
" cb=" + appClient; " cb=" + appClient;
} }
public String getHAZookeeperConnectionState() {
return this.zkConnectionState.name();
}
} }

View File

@ -64,6 +64,9 @@ Release 2.7.2 - UNRELEASED
YARN-3893. Both RM in active state when Admin#transitionToActive failure YARN-3893. Both RM in active state when Admin#transitionToActive failure
from refeshAll() (Bibin A Chundatt via rohithsharmaks) from refeshAll() (Bibin A Chundatt via rohithsharmaks)
YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager
gets connection issue. (Xuan Gong via jianhe)
Release 2.7.1 - 2015-07-06 Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -701,4 +701,13 @@ public class AdminService extends CompositeService implements
"AdminService", "Exception " + msg); "AdminService", "Exception " + msg);
return RPCUtil.getRemoteException(ioe); return RPCUtil.getRemoteException(ioe);
} }
public String getHAZookeeperConnectionState() {
if (!rmContext.isHAEnabled()) {
return "ResourceManager HA is not enabled.";
} else if (!autoFailoverEnabled) {
return "Auto Failover is not enabled.";
}
return this.embeddedElector.getHAZookeeperConnectionState();
}
} }

View File

@ -205,4 +205,8 @@ public class EmbeddedElectorService extends AbstractService
elector.quitElection(false); elector.quitElection(false);
elector.joinElection(localActiveNodeInfo); elector.joinElection(localActiveNodeInfo);
} }
public String getHAZookeeperConnectionState() {
return elector.getHAZookeeperConnectionState();
}
} }

View File

@ -44,6 +44,8 @@ public class AboutBlock extends HtmlBlock {
_("Cluster ID:", cinfo.getClusterId()). _("Cluster ID:", cinfo.getClusterId()).
_("ResourceManager state:", cinfo.getState()). _("ResourceManager state:", cinfo.getState()).
_("ResourceManager HA state:", cinfo.getHAState()). _("ResourceManager HA state:", cinfo.getHAState()).
_("ResourceManager HA zookeeper connection state:",
cinfo.getHAZookeeperConnectionState()).
_("ResourceManager RMStateStore:", cinfo.getRMStateStore()). _("ResourceManager RMStateStore:", cinfo.getRMStateStore()).
_("ResourceManager started on:", Times.format(cinfo.getStartedOn())). _("ResourceManager started on:", Times.format(cinfo.getStartedOn())).
_("ResourceManager version:", cinfo.getRMBuildVersion() + _("ResourceManager version:", cinfo.getRMBuildVersion() +

View File

@ -114,4 +114,9 @@ public class RMWebApp extends WebApp implements YarnWebParams {
} }
return path; return path;
} }
public String getHAZookeeperConnectionState() {
return rm.getRMContext().getRMAdminService()
.getHAZookeeperConnectionState();
}
} }

View File

@ -58,6 +58,7 @@ public class RMWebAppFilter extends GuiceContainer {
private String path; private String path;
private static final int BASIC_SLEEP_TIME = 5; private static final int BASIC_SLEEP_TIME = 5;
private static final int MAX_SLEEP_TIME = 5 * 60; private static final int MAX_SLEEP_TIME = 5 * 60;
private static final Random randnum = new Random();
@Inject @Inject
public RMWebAppFilter(Injector injector, Configuration conf) { public RMWebAppFilter(Injector injector, Configuration conf) {
@ -126,6 +127,8 @@ public class RMWebAppFilter extends GuiceContainer {
String redirectMsg = String redirectMsg =
doRetry ? "Can not find any active RM. Will retry in next " + next doRetry ? "Can not find any active RM. Will retry in next " + next
+ " seconds." : "There is no active RM right now."; + " seconds." : "There is no active RM right now.";
redirectMsg += "\nHA Zookeeper Connection State: "
+ rmWebApp.getHAZookeeperConnectionState();
PrintWriter out = response.getWriter(); PrintWriter out = response.getWriter();
out.println(redirectMsg); out.println(redirectMsg);
if (doRetry) { if (doRetry) {
@ -172,6 +175,6 @@ public class RMWebAppFilter extends GuiceContainer {
private static int calculateExponentialTime(int retries) { private static int calculateExponentialTime(int retries) {
long baseTime = BASIC_SLEEP_TIME * (1L << retries); long baseTime = BASIC_SLEEP_TIME * (1L << retries);
return (int) (baseTime * ((new Random()).nextDouble() + 0.5)); return (int) (baseTime * (randnum.nextDouble() + 0.5));
} }
} }

View File

@ -43,6 +43,7 @@ public class ClusterInfo {
protected String hadoopVersion; protected String hadoopVersion;
protected String hadoopBuildVersion; protected String hadoopBuildVersion;
protected String hadoopVersionBuiltOn; protected String hadoopVersionBuiltOn;
protected String haZooKeeperConnectionState;
public ClusterInfo() { public ClusterInfo() {
} // JAXB needs this } // JAXB needs this
@ -62,6 +63,8 @@ public class ClusterInfo {
this.hadoopVersion = VersionInfo.getVersion(); this.hadoopVersion = VersionInfo.getVersion();
this.hadoopBuildVersion = VersionInfo.getBuildVersion(); this.hadoopBuildVersion = VersionInfo.getBuildVersion();
this.hadoopVersionBuiltOn = VersionInfo.getDate(); this.hadoopVersionBuiltOn = VersionInfo.getDate();
this.haZooKeeperConnectionState =
rm.getRMContext().getRMAdminService().getHAZookeeperConnectionState();
} }
public String getState() { public String getState() {
@ -108,4 +111,7 @@ public class ClusterInfo {
return this.startedOn; return this.startedOn;
} }
public String getHAZookeeperConnectionState() {
return this.haZooKeeperConnectionState;
}
} }

View File

@ -277,6 +277,8 @@ public class TestRMWebServices extends JerseyTestBase {
WebServicesTestUtils.getXmlLong(element, "startedOn"), WebServicesTestUtils.getXmlLong(element, "startedOn"),
WebServicesTestUtils.getXmlString(element, "state"), WebServicesTestUtils.getXmlString(element, "state"),
WebServicesTestUtils.getXmlString(element, "haState"), WebServicesTestUtils.getXmlString(element, "haState"),
WebServicesTestUtils.getXmlString(
element, "haZooKeeperConnectionState"),
WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"), WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"),
WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"), WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"),
WebServicesTestUtils.getXmlString(element, "hadoopVersion"), WebServicesTestUtils.getXmlString(element, "hadoopVersion"),
@ -292,9 +294,10 @@ public class TestRMWebServices extends JerseyTestBase {
Exception { Exception {
assertEquals("incorrect number of elements", 1, json.length()); assertEquals("incorrect number of elements", 1, json.length());
JSONObject info = json.getJSONObject("clusterInfo"); JSONObject info = json.getJSONObject("clusterInfo");
assertEquals("incorrect number of elements", 11, info.length()); assertEquals("incorrect number of elements", 12, info.length());
verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"), verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"),
info.getString("state"), info.getString("haState"), info.getString("state"), info.getString("haState"),
info.getString("haZooKeeperConnectionState"),
info.getString("hadoopVersionBuiltOn"), info.getString("hadoopVersionBuiltOn"),
info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"), info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"),
info.getString("resourceManagerVersionBuiltOn"), info.getString("resourceManagerVersionBuiltOn"),
@ -304,7 +307,8 @@ public class TestRMWebServices extends JerseyTestBase {
} }
public void verifyClusterGeneric(long clusterid, long startedon, public void verifyClusterGeneric(long clusterid, long startedon,
String state, String haState, String hadoopVersionBuiltOn, String state, String haState, String haZooKeeperConnectionState,
String hadoopVersionBuiltOn,
String hadoopBuildVersion, String hadoopVersion, String hadoopBuildVersion, String hadoopVersion,
String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion, String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion,
String resourceManagerVersion) { String resourceManagerVersion) {