YARN-4307. Display blacklisted nodes for AM container in the RM web UI. Contributed by Naganarasimha G R.
(cherry picked from commit 308d63f382
)
This commit is contained in:
parent
dfad608b22
commit
99966b1912
|
@ -64,6 +64,9 @@ Release 2.9.0 - UNRELEASED
|
|||
ApplicationSubmissionContextInfo more consistent.
|
||||
(Xuan Gong via vvasudev)
|
||||
|
||||
YARN-4307. Display blacklisted nodes for AM container in the RM web UI.
|
||||
(Naganarasimha G R via vvasudev)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -18,10 +18,13 @@
|
|||
|
||||
// Generated by HamletGen. Do NOT edit!
|
||||
package org.apache.hadoop.yarn.webapp.hamlet;
|
||||
import static java.util.EnumSet.of;
|
||||
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.ENDTAG;
|
||||
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.INLINE;
|
||||
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.PRE;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.util.EnumSet;
|
||||
import static java.util.EnumSet.*;
|
||||
import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.*;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.yarn.webapp.SubView;
|
||||
|
@ -2405,6 +2408,10 @@ public class Hamlet extends HamletImpl implements HamletSpec._Html {
|
|||
return setSelector(th(), selector)._(cdata)._();
|
||||
}
|
||||
|
||||
public TR<T> th(String selector, String title, String cdata) {
|
||||
return setSelector(th(), selector).$title(title)._(cdata)._();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TD<TR<T>> td() {
|
||||
closeAttrs();
|
||||
|
|
|
@ -72,11 +72,10 @@ public class SimpleBlacklistManager implements BlacklistManager {
|
|||
}
|
||||
ret = new BlacklistUpdates(blacklist, EMPTY_LIST);
|
||||
} else {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("blacklist size " + currentBlacklistSize + " is more than " +
|
||||
"failure threshold ratio " + blacklistDisableFailureThreshold +
|
||||
" out of total usable nodes " + numberOfNodeManagerHosts);
|
||||
}
|
||||
LOG.warn("Ignoring Blacklists, blacklist size " + currentBlacklistSize
|
||||
+ " is more than failure threshold ratio "
|
||||
+ blacklistDisableFailureThreshold + " out of total usable nodes "
|
||||
+ numberOfNodeManagerHosts);
|
||||
ret = new BlacklistUpdates(EMPTY_LIST, blacklist);
|
||||
}
|
||||
return ret;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
import javax.crypto.SecretKey;
|
||||
|
@ -252,4 +253,9 @@ public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> {
|
|||
* @param amLaunchDiagnostics
|
||||
*/
|
||||
void updateAMLaunchDiagnostics(String amLaunchDiagnostics);
|
||||
|
||||
/**
|
||||
* @return Set of nodes which are blacklisted by the application
|
||||
*/
|
||||
Set<String> getBlacklistedNodes();
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
@ -91,7 +92,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||
|
@ -2142,4 +2145,18 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
public void setRecoveredFinalState(RMAppAttemptState finalState) {
|
||||
this.recoveredFinalState = finalState;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getBlacklistedNodes() {
|
||||
if (scheduler instanceof AbstractYarnScheduler) {
|
||||
AbstractYarnScheduler ayScheduler =
|
||||
(AbstractYarnScheduler) scheduler;
|
||||
SchedulerApplicationAttempt attempt =
|
||||
ayScheduler.getApplicationAttempt(applicationAttemptId);
|
||||
if (attempt != null) {
|
||||
return attempt.getBlacklistedNodes();
|
||||
}
|
||||
}
|
||||
return Collections.EMPTY_SET;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,9 +23,11 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
|
|||
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._ODD;
|
||||
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerReport;
|
||||
|
@ -36,8 +38,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
|
||||
import org.apache.hadoop.yarn.server.webapp.AppAttemptBlock;
|
||||
import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo;
|
||||
|
@ -48,11 +48,8 @@ import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
|
|||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
|
||||
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
|
||||
import org.apache.hadoop.yarn.webapp.view.InfoBlock;
|
||||
import com.google.inject.Inject;
|
||||
import java.util.List;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
import com.google.inject.Inject;
|
||||
|
||||
public class RMAppAttemptBlock extends AppAttemptBlock{
|
||||
|
||||
|
@ -207,14 +204,13 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
|
|||
Collection<ContainerReport> containers, AppAttemptInfo appAttempt,
|
||||
String node) {
|
||||
|
||||
String blacklistedNodes = "-";
|
||||
Set<String> nodes =
|
||||
getBlacklistedNodes(rm, getRMAppAttempt().getAppAttemptId());
|
||||
if (nodes != null) {
|
||||
if (!nodes.isEmpty()) {
|
||||
blacklistedNodes = StringUtils.join(nodes, ", ");
|
||||
}
|
||||
}
|
||||
RMAppAttempt rmAppAttempt = getRMAppAttempt();
|
||||
// nodes which are blacklisted by the application
|
||||
String appBlacklistedNodes =
|
||||
getNodeString(rmAppAttempt.getBlacklistedNodes());
|
||||
// nodes which are blacklisted by the RM for AM launches
|
||||
String rmBlackListedNodes = getNodeString(
|
||||
rmAppAttempt.getAMBlacklist().getBlacklistUpdates().getAdditions());
|
||||
|
||||
info("Application Attempt Overview")
|
||||
._(
|
||||
|
@ -248,21 +244,17 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
|
|||
._(
|
||||
"Diagnostics Info:",
|
||||
appAttempt.getDiagnosticsInfo() == null ? "" : appAttempt
|
||||
.getDiagnosticsInfo())._("Blacklisted Nodes:", blacklistedNodes);
|
||||
.getDiagnosticsInfo())
|
||||
._("Application Blacklisted Nodes:", appBlacklistedNodes)
|
||||
._("RM Blacklisted Nodes(for AM launches)", rmBlackListedNodes);
|
||||
}
|
||||
|
||||
public static Set<String> getBlacklistedNodes(ResourceManager rm,
|
||||
ApplicationAttemptId appid) {
|
||||
if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) {
|
||||
AbstractYarnScheduler ayScheduler =
|
||||
(AbstractYarnScheduler) rm.getResourceScheduler();
|
||||
SchedulerApplicationAttempt attempt =
|
||||
ayScheduler.getApplicationAttempt(appid);
|
||||
if (attempt != null) {
|
||||
return attempt.getBlacklistedNodes();
|
||||
}
|
||||
private String getNodeString(Collection<String> nodes) {
|
||||
String concatinatedString = "-";
|
||||
if (null != nodes && !nodes.isEmpty()) {
|
||||
concatinatedString = StringUtils.join(nodes, ", ");
|
||||
}
|
||||
return null;
|
||||
return concatinatedString;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,9 +20,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp;
|
|||
|
||||
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.StringEscapeUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
|
||||
|
@ -43,12 +44,8 @@ import org.apache.hadoop.yarn.webapp.view.InfoBlock;
|
|||
|
||||
import com.google.inject.Inject;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
public class RMAppBlock extends AppBlock{
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(RMAppBlock.class);
|
||||
private final ResourceManager rm;
|
||||
private final Configuration conf;
|
||||
|
||||
|
@ -116,7 +113,10 @@ public class RMAppBlock extends AppBlock{
|
|||
Hamlet.TBODY<Hamlet.TABLE<Hamlet>> tbody =
|
||||
html.table("#attempts").thead().tr().th(".id", "Attempt ID")
|
||||
.th(".started", "Started").th(".node", "Node").th(".logs", "Logs")
|
||||
.th(".blacklistednodes", "Blacklisted Nodes")._()._().tbody();
|
||||
.th(".appBlacklistednodes", "Nodes black listed by the application",
|
||||
"App Blacklisted Nodes")
|
||||
.th(".rmBlacklistednodes", "Nodes black listed by the RM for the"
|
||||
+ " app", "RM Blacklisted Nodes")._()._().tbody();
|
||||
|
||||
RMApp rmApp = this.rm.getRMContext().getRMApps().get(this.appID);
|
||||
if (rmApp == null) {
|
||||
|
@ -132,13 +132,12 @@ public class RMAppBlock extends AppBlock{
|
|||
AppAttemptInfo attemptInfo =
|
||||
new AppAttemptInfo(this.rm, rmAppAttempt, rmApp.getUser(),
|
||||
WebAppUtils.getHttpSchemePrefix(conf));
|
||||
String blacklistedNodesCount = "N/A";
|
||||
Set<String> nodes =
|
||||
RMAppAttemptBlock.getBlacklistedNodes(rm,
|
||||
rmAppAttempt.getAppAttemptId());
|
||||
if(nodes != null) {
|
||||
blacklistedNodesCount = String.valueOf(nodes.size());
|
||||
}
|
||||
Set<String> nodes = rmAppAttempt.getBlacklistedNodes();
|
||||
// nodes which are blacklisted by the application
|
||||
String appBlacklistedNodesCount = String.valueOf(nodes.size());
|
||||
// nodes which are blacklisted by the RM for AM launches
|
||||
String rmBlacklistedNodesCount = String.valueOf(rmAppAttempt
|
||||
.getAMBlacklist().getBlacklistUpdates().getAdditions().size());
|
||||
String nodeLink = attemptInfo.getNodeHttpAddress();
|
||||
if (nodeLink != null) {
|
||||
nodeLink = WebAppUtils.getHttpSchemePrefix(conf) + nodeLink;
|
||||
|
@ -158,8 +157,9 @@ public class RMAppBlock extends AppBlock{
|
|||
.escapeJavaScript(StringEscapeUtils.escapeHtml(nodeLink)))
|
||||
.append("</a>\",\"<a ")
|
||||
.append(logsLink == null ? "#" : "href='" + logsLink).append("'>")
|
||||
.append(logsLink == null ? "N/A" : "Logs").append("</a>\",").append(
|
||||
"\"").append(blacklistedNodesCount).append("\"],\n");
|
||||
.append(logsLink == null ? "N/A" : "Logs").append("</a>\",")
|
||||
.append("\"").append(appBlacklistedNodesCount).append("\",")
|
||||
.append("\"").append(rmBlacklistedNodesCount).append("\"],\n");
|
||||
}
|
||||
if (attemptsTableData.charAt(attemptsTableData.length() - 2) == ',') {
|
||||
attemptsTableData.delete(attemptsTableData.length() - 2,
|
||||
|
|
|
@ -95,8 +95,9 @@ public class RMAppsBlock extends AppsBlock {
|
|||
}
|
||||
|
||||
String blacklistedNodesCount = "N/A";
|
||||
Set<String> nodes =
|
||||
RMAppAttemptBlock.getBlacklistedNodes(rm, appAttemptId);
|
||||
Set<String> nodes = rm.getRMContext().getRMApps()
|
||||
.get(appAttemptId.getApplicationId()).getAppAttempts()
|
||||
.get(appAttemptId).getBlacklistedNodes();
|
||||
if (nodes != null) {
|
||||
blacklistedNodesCount = String.valueOf(nodes.size());
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ public class AppAttemptInfo {
|
|||
protected String nodeId;
|
||||
protected String logsLink;
|
||||
protected String blacklistedNodes;
|
||||
protected String rmBlacklistedNodesForAMLaunches;
|
||||
protected String appAttemptId;
|
||||
|
||||
public AppAttemptInfo() {
|
||||
|
@ -67,6 +68,10 @@ public class AppAttemptInfo {
|
|||
this.logsLink = WebAppUtils.getRunningLogURL(schemePrefix
|
||||
+ masterContainer.getNodeHttpAddress(),
|
||||
ConverterUtils.toString(masterContainer.getId()), user);
|
||||
|
||||
rmBlacklistedNodesForAMLaunches = StringUtils.join(
|
||||
attempt.getAMBlacklist().getBlacklistUpdates().getAdditions(),
|
||||
", ");
|
||||
if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) {
|
||||
AbstractYarnScheduler ayScheduler =
|
||||
(AbstractYarnScheduler) rm.getResourceScheduler();
|
||||
|
|
|
@ -1643,7 +1643,7 @@ public class TestRMWebServicesApps extends JerseyTestBase {
|
|||
String user)
|
||||
throws JSONException, Exception {
|
||||
|
||||
assertEquals("incorrect number of elements", 9, info.length());
|
||||
assertEquals("incorrect number of elements", 10, info.length());
|
||||
|
||||
verifyAppAttemptInfoGeneric(appAttempt, info.getInt("id"),
|
||||
info.getLong("startTime"), info.getString("containerId"),
|
||||
|
|
Loading…
Reference in New Issue