YARN-4307. Display blacklisted nodes for AM container in the RM web UI. Contributed by Naganarasimha G R.

This commit is contained in:
Varun Vasudev 2016-02-04 13:32:54 +05:30
parent 63c63e298c
commit 308d63f382
10 changed files with 83 additions and 53 deletions

View File

@ -122,6 +122,9 @@ Release 2.9.0 - UNRELEASED
ApplicationSubmissionContextInfo more consistent. ApplicationSubmissionContextInfo more consistent.
(Xuan Gong via vvasudev) (Xuan Gong via vvasudev)
YARN-4307. Display blacklisted nodes for AM container in the RM web UI.
(Naganarasimha G R via vvasudev)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -18,10 +18,13 @@
// Generated by HamletGen. Do NOT edit! // Generated by HamletGen. Do NOT edit!
package org.apache.hadoop.yarn.webapp.hamlet; package org.apache.hadoop.yarn.webapp.hamlet;
import static java.util.EnumSet.of;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.ENDTAG;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.INLINE;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.PRE;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.EnumSet; import java.util.EnumSet;
import static java.util.EnumSet.*;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.*;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.SubView;
@ -2405,6 +2408,10 @@ public TR<T> th(String selector, String cdata) {
return setSelector(th(), selector)._(cdata)._(); return setSelector(th(), selector)._(cdata)._();
} }
public TR<T> th(String selector, String title, String cdata) {
return setSelector(th(), selector).$title(title)._(cdata)._();
}
@Override @Override
public TD<TR<T>> td() { public TD<TR<T>> td() {
closeAttrs(); closeAttrs();

View File

@ -72,11 +72,10 @@ public BlacklistUpdates getBlacklistUpdates() {
} }
ret = new BlacklistUpdates(blacklist, EMPTY_LIST); ret = new BlacklistUpdates(blacklist, EMPTY_LIST);
} else { } else {
if (LOG.isDebugEnabled()) { LOG.warn("Ignoring Blacklists, blacklist size " + currentBlacklistSize
LOG.debug("blacklist size " + currentBlacklistSize + " is more than " + + " is more than failure threshold ratio "
"failure threshold ratio " + blacklistDisableFailureThreshold + + blacklistDisableFailureThreshold + " out of total usable nodes "
" out of total usable nodes " + numberOfNodeManagerHosts); + numberOfNodeManagerHosts);
}
ret = new BlacklistUpdates(EMPTY_LIST, blacklist); ret = new BlacklistUpdates(EMPTY_LIST, blacklist);
} }
return ret; return ret;

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import javax.crypto.SecretKey; import javax.crypto.SecretKey;
@ -252,4 +253,9 @@ public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> {
* @param amLaunchDiagnostics * @param amLaunchDiagnostics
*/ */
void updateAMLaunchDiagnostics(String amLaunchDiagnostics); void updateAMLaunchDiagnostics(String amLaunchDiagnostics);
/**
* @return Set of nodes which are blacklisted by the application
*/
Set<String> getBlacklistedNodes();
} }

View File

@ -28,6 +28,7 @@
import java.util.Collections; import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
@ -91,7 +92,9 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
@ -2142,4 +2145,18 @@ public RMAppAttemptState getRecoveredFinalState() {
public void setRecoveredFinalState(RMAppAttemptState finalState) { public void setRecoveredFinalState(RMAppAttemptState finalState) {
this.recoveredFinalState = finalState; this.recoveredFinalState = finalState;
} }
@Override
public Set<String> getBlacklistedNodes() {
if (scheduler instanceof AbstractYarnScheduler) {
AbstractYarnScheduler ayScheduler =
(AbstractYarnScheduler) scheduler;
SchedulerApplicationAttempt attempt =
ayScheduler.getApplicationAttempt(applicationAttemptId);
if (attempt != null) {
return attempt.getBlacklistedNodes();
}
}
return Collections.EMPTY_SET;
}
} }

View File

@ -23,9 +23,11 @@
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._ODD; import static org.apache.hadoop.yarn.webapp.view.JQueryUI._ODD;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH; import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH;
import java.util.Collection;
import java.util.List;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.ContainerReport;
@ -36,8 +38,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
import org.apache.hadoop.yarn.server.webapp.AppAttemptBlock; import org.apache.hadoop.yarn.server.webapp.AppAttemptBlock;
import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo;
@ -48,11 +48,8 @@
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.apache.hadoop.yarn.webapp.view.InfoBlock; import org.apache.hadoop.yarn.webapp.view.InfoBlock;
import com.google.inject.Inject;
import java.util.List;
import java.util.Collection; import com.google.inject.Inject;
import java.util.Set;
public class RMAppAttemptBlock extends AppAttemptBlock{ public class RMAppAttemptBlock extends AppAttemptBlock{
@ -207,14 +204,13 @@ protected void generateOverview(ApplicationAttemptReport appAttemptReport,
Collection<ContainerReport> containers, AppAttemptInfo appAttempt, Collection<ContainerReport> containers, AppAttemptInfo appAttempt,
String node) { String node) {
String blacklistedNodes = "-"; RMAppAttempt rmAppAttempt = getRMAppAttempt();
Set<String> nodes = // nodes which are blacklisted by the application
getBlacklistedNodes(rm, getRMAppAttempt().getAppAttemptId()); String appBlacklistedNodes =
if (nodes != null) { getNodeString(rmAppAttempt.getBlacklistedNodes());
if (!nodes.isEmpty()) { // nodes which are blacklisted by the RM for AM launches
blacklistedNodes = StringUtils.join(nodes, ", "); String rmBlackListedNodes = getNodeString(
} rmAppAttempt.getAMBlacklist().getBlacklistUpdates().getAdditions());
}
info("Application Attempt Overview") info("Application Attempt Overview")
._( ._(
@ -248,21 +244,17 @@ protected void generateOverview(ApplicationAttemptReport appAttemptReport,
._( ._(
"Diagnostics Info:", "Diagnostics Info:",
appAttempt.getDiagnosticsInfo() == null ? "" : appAttempt appAttempt.getDiagnosticsInfo() == null ? "" : appAttempt
.getDiagnosticsInfo())._("Blacklisted Nodes:", blacklistedNodes); .getDiagnosticsInfo())
._("Application Blacklisted Nodes:", appBlacklistedNodes)
._("RM Blacklisted Nodes(for AM launches)", rmBlackListedNodes);
} }
public static Set<String> getBlacklistedNodes(ResourceManager rm, private String getNodeString(Collection<String> nodes) {
ApplicationAttemptId appid) { String concatinatedString = "-";
if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) { if (null != nodes && !nodes.isEmpty()) {
AbstractYarnScheduler ayScheduler = concatinatedString = StringUtils.join(nodes, ", ");
(AbstractYarnScheduler) rm.getResourceScheduler();
SchedulerApplicationAttempt attempt =
ayScheduler.getApplicationAttempt(appid);
if (attempt != null) {
return attempt.getBlacklistedNodes();
}
} }
return null; return concatinatedString;
} }
@Override @Override

View File

@ -20,9 +20,10 @@
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP; import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
import java.util.Collection;
import java.util.Set;
import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
@ -43,12 +44,8 @@
import com.google.inject.Inject; import com.google.inject.Inject;
import java.util.Collection;
import java.util.Set;
public class RMAppBlock extends AppBlock{ public class RMAppBlock extends AppBlock{
private static final Log LOG = LogFactory.getLog(RMAppBlock.class);
private final ResourceManager rm; private final ResourceManager rm;
private final Configuration conf; private final Configuration conf;
@ -116,7 +113,10 @@ protected void generateApplicationTable(Block html,
Hamlet.TBODY<Hamlet.TABLE<Hamlet>> tbody = Hamlet.TBODY<Hamlet.TABLE<Hamlet>> tbody =
html.table("#attempts").thead().tr().th(".id", "Attempt ID") html.table("#attempts").thead().tr().th(".id", "Attempt ID")
.th(".started", "Started").th(".node", "Node").th(".logs", "Logs") .th(".started", "Started").th(".node", "Node").th(".logs", "Logs")
.th(".blacklistednodes", "Blacklisted Nodes")._()._().tbody(); .th(".appBlacklistednodes", "Nodes black listed by the application",
"App Blacklisted Nodes")
.th(".rmBlacklistednodes", "Nodes black listed by the RM for the"
+ " app", "RM Blacklisted Nodes")._()._().tbody();
RMApp rmApp = this.rm.getRMContext().getRMApps().get(this.appID); RMApp rmApp = this.rm.getRMContext().getRMApps().get(this.appID);
if (rmApp == null) { if (rmApp == null) {
@ -132,13 +132,12 @@ protected void generateApplicationTable(Block html,
AppAttemptInfo attemptInfo = AppAttemptInfo attemptInfo =
new AppAttemptInfo(this.rm, rmAppAttempt, rmApp.getUser(), new AppAttemptInfo(this.rm, rmAppAttempt, rmApp.getUser(),
WebAppUtils.getHttpSchemePrefix(conf)); WebAppUtils.getHttpSchemePrefix(conf));
String blacklistedNodesCount = "N/A"; Set<String> nodes = rmAppAttempt.getBlacklistedNodes();
Set<String> nodes = // nodes which are blacklisted by the application
RMAppAttemptBlock.getBlacklistedNodes(rm, String appBlacklistedNodesCount = String.valueOf(nodes.size());
rmAppAttempt.getAppAttemptId()); // nodes which are blacklisted by the RM for AM launches
if(nodes != null) { String rmBlacklistedNodesCount = String.valueOf(rmAppAttempt
blacklistedNodesCount = String.valueOf(nodes.size()); .getAMBlacklist().getBlacklistUpdates().getAdditions().size());
}
String nodeLink = attemptInfo.getNodeHttpAddress(); String nodeLink = attemptInfo.getNodeHttpAddress();
if (nodeLink != null) { if (nodeLink != null) {
nodeLink = WebAppUtils.getHttpSchemePrefix(conf) + nodeLink; nodeLink = WebAppUtils.getHttpSchemePrefix(conf) + nodeLink;
@ -158,8 +157,9 @@ protected void generateApplicationTable(Block html,
.escapeJavaScript(StringEscapeUtils.escapeHtml(nodeLink))) .escapeJavaScript(StringEscapeUtils.escapeHtml(nodeLink)))
.append("</a>\",\"<a ") .append("</a>\",\"<a ")
.append(logsLink == null ? "#" : "href='" + logsLink).append("'>") .append(logsLink == null ? "#" : "href='" + logsLink).append("'>")
.append(logsLink == null ? "N/A" : "Logs").append("</a>\",").append( .append(logsLink == null ? "N/A" : "Logs").append("</a>\",")
"\"").append(blacklistedNodesCount).append("\"],\n"); .append("\"").append(appBlacklistedNodesCount).append("\",")
.append("\"").append(rmBlacklistedNodesCount).append("\"],\n");
} }
if (attemptsTableData.charAt(attemptsTableData.length() - 2) == ',') { if (attemptsTableData.charAt(attemptsTableData.length() - 2) == ',') {
attemptsTableData.delete(attemptsTableData.length() - 2, attemptsTableData.delete(attemptsTableData.length() - 2,

View File

@ -95,8 +95,9 @@ protected void renderData(Block html) {
} }
String blacklistedNodesCount = "N/A"; String blacklistedNodesCount = "N/A";
Set<String> nodes = Set<String> nodes = rm.getRMContext().getRMApps()
RMAppAttemptBlock.getBlacklistedNodes(rm, appAttemptId); .get(appAttemptId.getApplicationId()).getAppAttempts()
.get(appAttemptId).getBlacklistedNodes();
if (nodes != null) { if (nodes != null) {
blacklistedNodesCount = String.valueOf(nodes.size()); blacklistedNodesCount = String.valueOf(nodes.size());
} }

View File

@ -42,6 +42,7 @@ public class AppAttemptInfo {
protected String nodeId; protected String nodeId;
protected String logsLink; protected String logsLink;
protected String blacklistedNodes; protected String blacklistedNodes;
protected String rmBlacklistedNodesForAMLaunches;
protected String appAttemptId; protected String appAttemptId;
public AppAttemptInfo() { public AppAttemptInfo() {
@ -67,6 +68,10 @@ public AppAttemptInfo(ResourceManager rm, RMAppAttempt attempt, String user,
this.logsLink = WebAppUtils.getRunningLogURL(schemePrefix this.logsLink = WebAppUtils.getRunningLogURL(schemePrefix
+ masterContainer.getNodeHttpAddress(), + masterContainer.getNodeHttpAddress(),
ConverterUtils.toString(masterContainer.getId()), user); ConverterUtils.toString(masterContainer.getId()), user);
rmBlacklistedNodesForAMLaunches = StringUtils.join(
attempt.getAMBlacklist().getBlacklistUpdates().getAdditions(),
", ");
if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) { if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) {
AbstractYarnScheduler ayScheduler = AbstractYarnScheduler ayScheduler =
(AbstractYarnScheduler) rm.getResourceScheduler(); (AbstractYarnScheduler) rm.getResourceScheduler();

View File

@ -1643,7 +1643,7 @@ public void verifyAppAttemptsInfo(JSONObject info, RMAppAttempt appAttempt,
String user) String user)
throws JSONException, Exception { throws JSONException, Exception {
assertEquals("incorrect number of elements", 9, info.length()); assertEquals("incorrect number of elements", 10, info.length());
verifyAppAttemptInfoGeneric(appAttempt, info.getInt("id"), verifyAppAttemptInfoGeneric(appAttempt, info.getInt("id"),
info.getLong("startTime"), info.getString("containerId"), info.getLong("startTime"), info.getString("containerId"),