YARN-9567. Add diagnostics for outstanding resource requests on app attempts page. Contributed by Tao Yang.
This commit is contained in:
parent
d273f6a2b4
commit
b9d825f178
|
@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceRequestInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceRequestInfo;
|
||||||
import org.apache.hadoop.yarn.server.webapp.AppAttemptBlock;
|
import org.apache.hadoop.yarn.server.webapp.AppAttemptBlock;
|
||||||
|
@ -62,6 +63,228 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
|
||||||
|
|
||||||
private final ResourceManager rm;
|
private final ResourceManager rm;
|
||||||
protected Configuration conf;
|
protected Configuration conf;
|
||||||
|
private final static String DIAGNOSTICS_SCRIPT_BODY = new StringBuilder()
|
||||||
|
.append("var refresh = false;")
|
||||||
|
.append("var haveGotAppDiagnostic = false;")
|
||||||
|
.append("function getArray(objOrArray) {")
|
||||||
|
.append(" if(Array.isArray(objOrArray)){")
|
||||||
|
.append(" return objOrArray;")
|
||||||
|
.append(" } else {")
|
||||||
|
.append(" return [objOrArray];")
|
||||||
|
.append(" }")
|
||||||
|
.append("}")
|
||||||
|
.append("function getTableDataFromObjectArray(objectArray, fields) {")
|
||||||
|
.append(" if (objectArray == undefined) {")
|
||||||
|
.append(" return [];")
|
||||||
|
.append(" }")
|
||||||
|
.append(" var data = [];")
|
||||||
|
.append(" $.each(objectArray, function(i, object) {")
|
||||||
|
.append(" var dataItem = [];")
|
||||||
|
.append(" for (var i=0; i<fields.length; i++) {")
|
||||||
|
.append(" dataItem.push(object[fields[i]] != undefined ? ")
|
||||||
|
.append("object[fields[i]] : '');")
|
||||||
|
.append(" }")
|
||||||
|
.append(" data.push(dataItem);")
|
||||||
|
.append(" });")
|
||||||
|
.append(" return data;")
|
||||||
|
.append("}")
|
||||||
|
.append("function sendRequest(requestURL, doneCallBackFunc,")
|
||||||
|
.append(" failCallBackFunc) {")
|
||||||
|
.append(" $.ajax({")
|
||||||
|
.append(" type: 'GET',")
|
||||||
|
.append(" url: requestURL,")
|
||||||
|
.append(" contentType: 'text/plain'")
|
||||||
|
.append(" }).done(doneCallBackFunc).fail(failCallBackFunc);")
|
||||||
|
.append("}")
|
||||||
|
.append("function parseQueryAppActResponse(responseObj) {")
|
||||||
|
.append(" var requestDiagnostics = [], dateTime, appDiagnostic;")
|
||||||
|
.append(" responseObj = responseObj['appActivities'];")
|
||||||
|
.append(" if(responseObj.hasOwnProperty('allocations')){")
|
||||||
|
.append(" var allocations = getArray(responseObj['allocations']);")
|
||||||
|
.append(" var allocation = allocations[0];")
|
||||||
|
.append(" dateTime = allocation['dateTime'];")
|
||||||
|
.append(" if(allocation.hasOwnProperty('diagnostic')){")
|
||||||
|
.append(" appDiagnostic = allocation['diagnostic'];")
|
||||||
|
.append(" }")
|
||||||
|
.append(" if(allocation.hasOwnProperty('children')){")
|
||||||
|
.append(" var requestAllocations = ")
|
||||||
|
.append(" getArray(allocation['children']);")
|
||||||
|
.append(" $.each(requestAllocations, function (i, requestAllocation) {")
|
||||||
|
.append(" if(requestAllocation.hasOwnProperty('children')){")
|
||||||
|
.append(" var allocationAttempts = ")
|
||||||
|
.append(" getArray(requestAllocation['children']);")
|
||||||
|
.append(" var diagnosticSummary = requestAllocation.hasOwnProperty(")
|
||||||
|
.append(" 'diagnostic')? requestAllocation['diagnostic']+'. ':'';")
|
||||||
|
.append(" $.each(allocationAttempts, function(i,allocationAttempt) {")
|
||||||
|
.append(" if (i > 0) {")
|
||||||
|
.append(" diagnosticSummary += ', ';")
|
||||||
|
.append(" }")
|
||||||
|
.append(" diagnosticSummary += allocationAttempt['count'] + ' ' +")
|
||||||
|
.append(" (allocationAttempt['count']=='1' ? 'node ' : 'nodes ')")
|
||||||
|
.append(" + allocationAttempt['allocationState'];")
|
||||||
|
.append(" if (allocationAttempt.hasOwnProperty('diagnostic')) {")
|
||||||
|
.append(" diagnosticSummary += ' with diagnostic:' + ")
|
||||||
|
.append(" allocationAttempt['diagnostic'];")
|
||||||
|
.append(" }")
|
||||||
|
.append(" });")
|
||||||
|
.append(" delete requestAllocation['children'];")
|
||||||
|
.append(" requestAllocation['diagnostic']=diagnosticSummary;")
|
||||||
|
.append(" }")
|
||||||
|
.append(" requestDiagnostics.push(requestAllocation);")
|
||||||
|
.append(" });")
|
||||||
|
.append(" }")
|
||||||
|
.append(" }")
|
||||||
|
.append(" return [requestDiagnostics, dateTime, appDiagnostic];")
|
||||||
|
.append("}")
|
||||||
|
.append("function handleQueryAppActDone(data){")
|
||||||
|
.append(" console.log('App activities:', data);")
|
||||||
|
.append(" var results = parseQueryAppActResponse(data);")
|
||||||
|
.append(" console.log('parsed results:', results);")
|
||||||
|
.append(" var requestDiagnostics = results[0];")
|
||||||
|
.append(" var dateTime = results[1];")
|
||||||
|
.append(" var appDiagnostic = results[2];")
|
||||||
|
.append(" haveGotAppDiagnostic = false;")
|
||||||
|
.append(" if (appDiagnostic != undefined) {")
|
||||||
|
.append(" haveGotAppDiagnostic = true;")
|
||||||
|
.append(" $('#appDiagnostic').html('App diagnostic: '")
|
||||||
|
.append(" + appDiagnostic);")
|
||||||
|
.append(" }")
|
||||||
|
.append(" $('#diagnosticsTableDiv').empty();")
|
||||||
|
.append(" if (requestDiagnostics.length > 0) {")
|
||||||
|
.append(" haveGotAppDiagnostic = true;")
|
||||||
|
.append(" tableData = getTableDataFromObjectArray(requestDiagnostics,")
|
||||||
|
.append(" ['requestPriority', 'allocationRequestId', 'allocationState',")
|
||||||
|
.append(" 'diagnostic']);")
|
||||||
|
.append(" $('#diagnosticsTableDiv').append('<table ")
|
||||||
|
.append(" id=\"diagnosticsTable\"></table>');")
|
||||||
|
.append(" $('#diagnosticsTable').dataTable( {")
|
||||||
|
.append(" 'aaData': tableData,")
|
||||||
|
.append(" 'aoColumns': [")
|
||||||
|
.append(" { 'sTitle': 'Priority' },")
|
||||||
|
.append(" { 'sTitle': 'AllocationRequestId' },")
|
||||||
|
.append(" { 'sTitle': 'AllocationState' },")
|
||||||
|
.append(" { 'sTitle': 'DiagnosticSummary' }")
|
||||||
|
.append(" ],")
|
||||||
|
.append(" bJQueryUI:true, sPaginationType: 'full_numbers',")
|
||||||
|
.append(" iDisplayLength:20, aLengthMenu:[20, 40, 60, 80, 100]")
|
||||||
|
.append(" });")
|
||||||
|
.append(" }")
|
||||||
|
.append(" if (haveGotAppDiagnostic) {")
|
||||||
|
.append(" $('#refreshDiagnosticsBtn').attr('disabled',false);")
|
||||||
|
.append(" $('#diagnosticsUpdateTime').html('Updated at ' + dateTime);")
|
||||||
|
.append(" } else {")
|
||||||
|
.append(" $('#diagnosticsUpdateTime').html('');")
|
||||||
|
.append(" if (refresh) {")
|
||||||
|
.append(" refreshSchedulerDiagnostics();")
|
||||||
|
.append(" }")
|
||||||
|
.append(" }")
|
||||||
|
.append("}")
|
||||||
|
.append("function handleRequestFailure(data){")
|
||||||
|
.append(" alert('Request app activities REST API failed.');")
|
||||||
|
.append(" console.log(data);")
|
||||||
|
.append("}")
|
||||||
|
.append("function handleRefreshAppActDone(data){")
|
||||||
|
.append(" refresh = true;")
|
||||||
|
.append(" setTimeout(function(){")
|
||||||
|
.append(" queryAppDiagnostics();")
|
||||||
|
.append(" }, 2000);")
|
||||||
|
.append("}")
|
||||||
|
.append("function refreshAppDiagnostics() {")
|
||||||
|
.append(" $('#refreshDiagnosticsBtn').attr('disabled',true);")
|
||||||
|
.append(" sendRequest(refreshAppActivitiesURL,handleRefreshAppActDone,")
|
||||||
|
.append(" handleRequestFailure);")
|
||||||
|
.append("}")
|
||||||
|
.append("function queryAppDiagnostics() {")
|
||||||
|
.append(" sendRequest(getAppActivitiesURL,handleQueryAppActDone,")
|
||||||
|
.append(" handleRequestFailure);")
|
||||||
|
.append("}")
|
||||||
|
.append("function parseHierarchicalQueue(node, nodeInfos,")
|
||||||
|
.append(" parentNodePath, partition) {")
|
||||||
|
.append(" var nodePath = parentNodePath + '/' + node['name'];")
|
||||||
|
.append(" if (node.hasOwnProperty('name')){")
|
||||||
|
.append(" if (node['diagnostic'] == undefined || node['diagnostic']")
|
||||||
|
.append(" .indexOf(ignoreActivityContent) == -1){")
|
||||||
|
.append(" var nodeInfo = {};")
|
||||||
|
.append(" nodeInfo['partition'] = partition;")
|
||||||
|
.append(" nodeInfo['path'] = nodePath;")
|
||||||
|
.append(" nodeInfo['allocationState'] = node['allocationState'];")
|
||||||
|
.append(" nodeInfo['diagnostic'] = ")
|
||||||
|
.append(" node['diagnostic'] == undefined ? '':node['diagnostic'];")
|
||||||
|
.append(" nodeInfos.push(nodeInfo);")
|
||||||
|
.append(" }")
|
||||||
|
.append(" }")
|
||||||
|
.append(" if (node.hasOwnProperty('children')){")
|
||||||
|
.append(" var children = getArray(node['children']);")
|
||||||
|
.append(" $.each(children, function (i, child) {")
|
||||||
|
.append(" parseHierarchicalQueue(child, nodeInfos, nodePath,")
|
||||||
|
.append(" partition);")
|
||||||
|
.append(" });")
|
||||||
|
.append(" }")
|
||||||
|
.append("}")
|
||||||
|
.append("function parseQueueDiagnostics(data) {")
|
||||||
|
.append(" var nodeInfos = [], dateTime;")
|
||||||
|
.append(" if(data.hasOwnProperty('dateTime')){")
|
||||||
|
.append(" dateTime = data['dateTime'];")
|
||||||
|
.append(" }")
|
||||||
|
.append(" if(data.hasOwnProperty('allocations')){")
|
||||||
|
.append(" var allocations = getArray(data['allocations']);")
|
||||||
|
.append(" $.each(allocations, function (i, allocation) {")
|
||||||
|
.append(" if (allocation.hasOwnProperty('root')) {")
|
||||||
|
.append(" var root = allocation['root'];")
|
||||||
|
.append(" var partition = allocation['partition'];")
|
||||||
|
.append(" parseHierarchicalQueue(root, nodeInfos, '', partition);")
|
||||||
|
.append(" }")
|
||||||
|
.append(" });")
|
||||||
|
.append(" }")
|
||||||
|
.append(" return [nodeInfos, dateTime];")
|
||||||
|
.append("}")
|
||||||
|
.append("function handleRefreshSchedulerActDone(data){")
|
||||||
|
.append(" console.log('handleRefreshSchedulerActDone', data);")
|
||||||
|
.append(" setTimeout(function(){")
|
||||||
|
.append(" querySchedulerDiagnostics();")
|
||||||
|
.append(" $('#refreshDiagnosticsBtn').attr('disabled',false);")
|
||||||
|
.append(" }, 100);")
|
||||||
|
.append("}")
|
||||||
|
.append("function handleQuerySchedulerActDone(data){")
|
||||||
|
.append(" console.log('Scheduler activities:', data);")
|
||||||
|
.append(" data = data['activities'];")
|
||||||
|
.append(" var results = parseQueueDiagnostics(data);")
|
||||||
|
.append(" var nodeInfos = results[0];")
|
||||||
|
.append(" var dateTime = results[1];")
|
||||||
|
.append(" $('#diagnosticsTableDiv').empty();")
|
||||||
|
.append(" if(dateTime != undefined){")
|
||||||
|
.append(" $('#diagnosticsUpdateTime').html('App diagnostics not found!")
|
||||||
|
.append(" Got useful scheduler activities updated at '+dateTime);")
|
||||||
|
.append(" tableData = getTableDataFromObjectArray(nodeInfos, ")
|
||||||
|
.append(" ['partition', 'path', 'allocationState', 'diagnostic']);")
|
||||||
|
.append(" $('#diagnosticsTableDiv').append('<table ")
|
||||||
|
.append(" id=\"diagnosticsTable\"></table>');")
|
||||||
|
.append(" $('#diagnosticsTable').dataTable({")
|
||||||
|
.append(" 'aaData': tableData,")
|
||||||
|
.append(" 'aoColumns': [")
|
||||||
|
.append(" { 'sTitle': 'Partition' },")
|
||||||
|
.append(" { 'sTitle': 'SchedulingNode' },")
|
||||||
|
.append(" { 'sTitle': 'AllocationState' },")
|
||||||
|
.append(" { 'sTitle': 'Diagnostic' }")
|
||||||
|
.append(" ],")
|
||||||
|
.append(" bJQueryUI:true, sPaginationType: 'full_numbers',")
|
||||||
|
.append(" iDisplayLength:20, aLengthMenu:[20, 40, 60, 80, 100]")
|
||||||
|
.append(" });")
|
||||||
|
.append(" } else if(data.hasOwnProperty('diagnostic')){")
|
||||||
|
.append(" $('#diagnosticsUpdateTime').html(data['diagnostic']);")
|
||||||
|
.append(" }")
|
||||||
|
.append(" $('#refreshDiagnosticsBtn').attr('disabled',false);")
|
||||||
|
.append("}")
|
||||||
|
.append("function refreshSchedulerDiagnostics() {")
|
||||||
|
.append(" $('#refreshDiagnosticsBtn').attr('disabled',true);")
|
||||||
|
.append(" sendRequest(schedulerActivitiesURL,")
|
||||||
|
.append(" handleRefreshSchedulerActDone,handleRequestFailure);")
|
||||||
|
.append("}")
|
||||||
|
.append("function querySchedulerDiagnostics() {")
|
||||||
|
.append(" sendRequest(schedulerActivitiesURL,")
|
||||||
|
.append(" handleQuerySchedulerActDone,handleRequestFailure);")
|
||||||
|
.append("}")
|
||||||
|
.append("queryAppDiagnostics();").toString();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
RMAppAttemptBlock(ViewContext ctx, ResourceManager rm, Configuration conf) {
|
RMAppAttemptBlock(ViewContext ctx, ResourceManager rm, Configuration conf) {
|
||||||
|
@ -137,6 +360,10 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
|
||||||
html.script().$type("text/javascript")
|
html.script().$type("text/javascript")
|
||||||
.__("var resourceRequestsTableData=" + resourceRequestTableData).__();
|
.__("var resourceRequestsTableData=" + resourceRequestTableData).__();
|
||||||
tbody.__().__();
|
tbody.__().__();
|
||||||
|
|
||||||
|
// create diagnostics table when CS is enabled
|
||||||
|
createDiagnosticsTable(html, div);
|
||||||
|
|
||||||
div.__();
|
div.__();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,4 +538,52 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
|
||||||
return rm.getClientRMService().getApplicationAttemptReport(request)
|
return rm.getClientRMService().getApplicationAttemptReport(request)
|
||||||
.getApplicationAttemptReport();
|
.getApplicationAttemptReport();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void createDiagnosticsTable(Block html, DIV<Hamlet> parentDiv) {
|
||||||
|
if (!(rm.getResourceScheduler() instanceof CapacityScheduler)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
String appActivitiesURL =
|
||||||
|
new StringBuilder().append(RMWSConsts.RM_WEB_SERVICE_PATH).append(
|
||||||
|
RMWSConsts.SCHEDULER_APP_ACTIVITIES.replace("{appid}",
|
||||||
|
this.appAttemptId.getApplicationId().toString())).append("?")
|
||||||
|
.toString();
|
||||||
|
String refreshAppActivitiesURL = appActivitiesURL + "actions=refresh";
|
||||||
|
String getAppActivitiesURL = appActivitiesURL
|
||||||
|
+ "actions=get&groupBy=diagnostic&summarize=true";
|
||||||
|
String refreshAndGetAppActivitiesURL = appActivitiesURL
|
||||||
|
+ "actions=refresh&actions=get&groupBy=diagnostic&summarize=true";
|
||||||
|
String schedulerActivitiesURL =
|
||||||
|
new StringBuilder().append(RMWSConsts.RM_WEB_SERVICE_PATH)
|
||||||
|
.append(RMWSConsts.SCHEDULER_ACTIVITIES).append("?")
|
||||||
|
.append(RMWSConsts.GROUP_BY).append("=diagnostic").toString();
|
||||||
|
|
||||||
|
DIV<DIV<Hamlet>> div = parentDiv.div();
|
||||||
|
div.p().__("Diagnostics in cache ").__("(For more details refer to ")
|
||||||
|
.a(refreshAndGetAppActivitiesURL, "App Activities").__(" or ")
|
||||||
|
.a(schedulerActivitiesURL, "Scheduler Activities").__(")").__();
|
||||||
|
div.button().$id("refreshDiagnosticsBtn")
|
||||||
|
.$style("border-style: solid; border-color: #000000; border-width: 1px;"
|
||||||
|
+ " cursor: hand; cursor: pointer; border-radius: 4px")
|
||||||
|
.$onclick("refreshAppDiagnostics()").b("Refresh").__();
|
||||||
|
div.p().$id("diagnosticsUpdateTime").__();
|
||||||
|
div.p().$id("appDiagnostic").__();
|
||||||
|
div.div("#diagnosticsTableDiv").__();
|
||||||
|
|
||||||
|
div.__();
|
||||||
|
|
||||||
|
StringBuilder script = new StringBuilder();
|
||||||
|
script
|
||||||
|
.append("var refreshAppActivitiesURL = '")
|
||||||
|
.append(refreshAppActivitiesURL).append("';")
|
||||||
|
.append("var getAppActivitiesURL = '")
|
||||||
|
.append(getAppActivitiesURL).append("';")
|
||||||
|
.append("var schedulerActivitiesURL = '")
|
||||||
|
.append(schedulerActivitiesURL).append("';")
|
||||||
|
.append("var ignoreActivityContent = '")
|
||||||
|
.append("does not need more resource';")
|
||||||
|
.append(DIAGNOSTICS_SCRIPT_BODY);
|
||||||
|
|
||||||
|
html.script().$type("text/javascript").__(script.toString()).__();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue