YARN-4307. Display blacklisted nodes for AM container in the RM web UI. Contributed by Naganarasimha G R.

(cherry picked from commit 308d63f382)
This commit is contained in:
Varun Vasudev 2016-02-04 13:32:54 +05:30
parent dfad608b22
commit 99966b1912
10 changed files with 83 additions and 53 deletions

View File

@ -64,6 +64,9 @@ Release 2.9.0 - UNRELEASED
ApplicationSubmissionContextInfo more consistent.
(Xuan Gong via vvasudev)
YARN-4307. Display blacklisted nodes for AM container in the RM web UI.
(Naganarasimha G R via vvasudev)
OPTIMIZATIONS
BUG FIXES

View File

@ -18,10 +18,13 @@
// Generated by HamletGen. Do NOT edit!
package org.apache.hadoop.yarn.webapp.hamlet;
import static java.util.EnumSet.of;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.ENDTAG;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.INLINE;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.PRE;
import java.io.PrintWriter;
import java.util.EnumSet;
import static java.util.EnumSet.*;
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.*;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.yarn.webapp.SubView;
@ -2405,6 +2408,10 @@ public class Hamlet extends HamletImpl implements HamletSpec._Html {
return setSelector(th(), selector)._(cdata)._();
}
public TR<T> th(String selector, String title, String cdata) {
return setSelector(th(), selector).$title(title)._(cdata)._();
}
@Override
public TD<TR<T>> td() {
closeAttrs();

View File

@ -72,11 +72,10 @@ public class SimpleBlacklistManager implements BlacklistManager {
}
ret = new BlacklistUpdates(blacklist, EMPTY_LIST);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("blacklist size " + currentBlacklistSize + " is more than " +
"failure threshold ratio " + blacklistDisableFailureThreshold +
" out of total usable nodes " + numberOfNodeManagerHosts);
}
LOG.warn("Ignoring Blacklists, blacklist size " + currentBlacklistSize
+ " is more than failure threshold ratio "
+ blacklistDisableFailureThreshold + " out of total usable nodes "
+ numberOfNodeManagerHosts);
ret = new BlacklistUpdates(EMPTY_LIST, blacklist);
}
return ret;

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
import javax.crypto.SecretKey;
@ -252,4 +253,9 @@ public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> {
* @param amLaunchDiagnostics
*/
void updateAMLaunchDiagnostics(String amLaunchDiagnostics);
/**
* @return Set of nodes which are blacklisted by the application
*/
Set<String> getBlacklistedNodes();
}

View File

@ -28,6 +28,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@ -91,7 +92,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
@ -2142,4 +2145,18 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
public void setRecoveredFinalState(RMAppAttemptState finalState) {
this.recoveredFinalState = finalState;
}
@Override
public Set<String> getBlacklistedNodes() {
if (scheduler instanceof AbstractYarnScheduler) {
AbstractYarnScheduler ayScheduler =
(AbstractYarnScheduler) scheduler;
SchedulerApplicationAttempt attempt =
ayScheduler.getApplicationAttempt(applicationAttemptId);
if (attempt != null) {
return attempt.getBlacklistedNodes();
}
}
return Collections.EMPTY_SET;
}
}

View File

@ -23,9 +23,11 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._ODD;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH;
import java.util.Collection;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerReport;
@ -36,8 +38,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
import org.apache.hadoop.yarn.server.webapp.AppAttemptBlock;
import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo;
@ -48,11 +48,8 @@ import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.apache.hadoop.yarn.webapp.view.InfoBlock;
import com.google.inject.Inject;
import java.util.List;
import java.util.Collection;
import java.util.Set;
import com.google.inject.Inject;
public class RMAppAttemptBlock extends AppAttemptBlock{
@ -207,14 +204,13 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
Collection<ContainerReport> containers, AppAttemptInfo appAttempt,
String node) {
String blacklistedNodes = "-";
Set<String> nodes =
getBlacklistedNodes(rm, getRMAppAttempt().getAppAttemptId());
if (nodes != null) {
if (!nodes.isEmpty()) {
blacklistedNodes = StringUtils.join(nodes, ", ");
}
}
RMAppAttempt rmAppAttempt = getRMAppAttempt();
// nodes which are blacklisted by the application
String appBlacklistedNodes =
getNodeString(rmAppAttempt.getBlacklistedNodes());
// nodes which are blacklisted by the RM for AM launches
String rmBlackListedNodes = getNodeString(
rmAppAttempt.getAMBlacklist().getBlacklistUpdates().getAdditions());
info("Application Attempt Overview")
._(
@ -248,21 +244,17 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
._(
"Diagnostics Info:",
appAttempt.getDiagnosticsInfo() == null ? "" : appAttempt
.getDiagnosticsInfo())._("Blacklisted Nodes:", blacklistedNodes);
.getDiagnosticsInfo())
._("Application Blacklisted Nodes:", appBlacklistedNodes)
._("RM Blacklisted Nodes(for AM launches)", rmBlackListedNodes);
}
public static Set<String> getBlacklistedNodes(ResourceManager rm,
ApplicationAttemptId appid) {
if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) {
AbstractYarnScheduler ayScheduler =
(AbstractYarnScheduler) rm.getResourceScheduler();
SchedulerApplicationAttempt attempt =
ayScheduler.getApplicationAttempt(appid);
if (attempt != null) {
return attempt.getBlacklistedNodes();
}
private String getNodeString(Collection<String> nodes) {
String concatinatedString = "-";
if (null != nodes && !nodes.isEmpty()) {
concatinatedString = StringUtils.join(nodes, ", ");
}
return null;
return concatinatedString;
}
@Override

View File

@ -20,9 +20,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
import java.util.Collection;
import java.util.Set;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
@ -43,12 +44,8 @@ import org.apache.hadoop.yarn.webapp.view.InfoBlock;
import com.google.inject.Inject;
import java.util.Collection;
import java.util.Set;
public class RMAppBlock extends AppBlock{
private static final Log LOG = LogFactory.getLog(RMAppBlock.class);
private final ResourceManager rm;
private final Configuration conf;
@ -116,7 +113,10 @@ public class RMAppBlock extends AppBlock{
Hamlet.TBODY<Hamlet.TABLE<Hamlet>> tbody =
html.table("#attempts").thead().tr().th(".id", "Attempt ID")
.th(".started", "Started").th(".node", "Node").th(".logs", "Logs")
.th(".blacklistednodes", "Blacklisted Nodes")._()._().tbody();
.th(".appBlacklistednodes", "Nodes black listed by the application",
"App Blacklisted Nodes")
.th(".rmBlacklistednodes", "Nodes black listed by the RM for the"
+ " app", "RM Blacklisted Nodes")._()._().tbody();
RMApp rmApp = this.rm.getRMContext().getRMApps().get(this.appID);
if (rmApp == null) {
@ -132,13 +132,12 @@ public class RMAppBlock extends AppBlock{
AppAttemptInfo attemptInfo =
new AppAttemptInfo(this.rm, rmAppAttempt, rmApp.getUser(),
WebAppUtils.getHttpSchemePrefix(conf));
String blacklistedNodesCount = "N/A";
Set<String> nodes =
RMAppAttemptBlock.getBlacklistedNodes(rm,
rmAppAttempt.getAppAttemptId());
if(nodes != null) {
blacklistedNodesCount = String.valueOf(nodes.size());
}
Set<String> nodes = rmAppAttempt.getBlacklistedNodes();
// nodes which are blacklisted by the application
String appBlacklistedNodesCount = String.valueOf(nodes.size());
// nodes which are blacklisted by the RM for AM launches
String rmBlacklistedNodesCount = String.valueOf(rmAppAttempt
.getAMBlacklist().getBlacklistUpdates().getAdditions().size());
String nodeLink = attemptInfo.getNodeHttpAddress();
if (nodeLink != null) {
nodeLink = WebAppUtils.getHttpSchemePrefix(conf) + nodeLink;
@ -158,8 +157,9 @@ public class RMAppBlock extends AppBlock{
.escapeJavaScript(StringEscapeUtils.escapeHtml(nodeLink)))
.append("</a>\",\"<a ")
.append(logsLink == null ? "#" : "href='" + logsLink).append("'>")
.append(logsLink == null ? "N/A" : "Logs").append("</a>\",").append(
"\"").append(blacklistedNodesCount).append("\"],\n");
.append(logsLink == null ? "N/A" : "Logs").append("</a>\",")
.append("\"").append(appBlacklistedNodesCount).append("\",")
.append("\"").append(rmBlacklistedNodesCount).append("\"],\n");
}
if (attemptsTableData.charAt(attemptsTableData.length() - 2) == ',') {
attemptsTableData.delete(attemptsTableData.length() - 2,

View File

@ -95,8 +95,9 @@ public class RMAppsBlock extends AppsBlock {
}
String blacklistedNodesCount = "N/A";
Set<String> nodes =
RMAppAttemptBlock.getBlacklistedNodes(rm, appAttemptId);
Set<String> nodes = rm.getRMContext().getRMApps()
.get(appAttemptId.getApplicationId()).getAppAttempts()
.get(appAttemptId).getBlacklistedNodes();
if (nodes != null) {
blacklistedNodesCount = String.valueOf(nodes.size());
}

View File

@ -42,6 +42,7 @@ public class AppAttemptInfo {
protected String nodeId;
protected String logsLink;
protected String blacklistedNodes;
protected String rmBlacklistedNodesForAMLaunches;
protected String appAttemptId;
public AppAttemptInfo() {
@ -67,6 +68,10 @@ public class AppAttemptInfo {
this.logsLink = WebAppUtils.getRunningLogURL(schemePrefix
+ masterContainer.getNodeHttpAddress(),
ConverterUtils.toString(masterContainer.getId()), user);
rmBlacklistedNodesForAMLaunches = StringUtils.join(
attempt.getAMBlacklist().getBlacklistUpdates().getAdditions(),
", ");
if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) {
AbstractYarnScheduler ayScheduler =
(AbstractYarnScheduler) rm.getResourceScheduler();

View File

@ -1643,7 +1643,7 @@ public class TestRMWebServicesApps extends JerseyTestBase {
String user)
throws JSONException, Exception {
assertEquals("incorrect number of elements", 9, info.length());
assertEquals("incorrect number of elements", 10, info.length());
verifyAppAttemptInfoGeneric(appAttempt, info.getInt("id"),
info.getLong("startTime"), info.getString("containerId"),