YARN-1376. NM need to notify the log aggregation status to RM through Node heartbeat. Contributed by Xuan Gong.

This commit is contained in:
Junping Du 2015-04-10 08:56:18 -07:00
parent 83979e61a4
commit 92431c9617
26 changed files with 1291 additions and 11 deletions

View File

@ -60,6 +60,9 @@ Release 2.8.0 - UNRELEASED
container-executor for outbound network traffic control. (Sidharta Seethana
via vinodkv)
YARN-1376. NM need to notify the log aggregation status to RM through
heartbeat. (Xuan Gong via junping_du)
IMPROVEMENTS
YARN-1880. Cleanup TestApplicationClientProtocolOnHA

View File

@ -741,6 +741,17 @@ public class YarnConfiguration extends Configuration {
YARN_PREFIX + "log-aggregation.retain-check-interval-seconds";
public static final long DEFAULT_LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS = -1;
/**
* How long for ResourceManager to wait for NodeManager to report its
* log aggregation status. If waiting time of which the log aggregation status
* is reported from NodeManager exceeds the configured value, RM will report
* log aggregation status for this NodeManager as TIME_OUT
*/
public static final String LOG_AGGREGATION_STATUS_TIME_OUT_MS =
YARN_PREFIX + "log-aggregation-status.time-out.ms";
public static final long DEFAULT_LOG_AGGREGATION_STATUS_TIME_OUT_MS
= 10 * 60 * 1000;
/**
* Number of seconds to retain logs on the NodeManager. Only applicable if Log
* aggregation is disabled

View File

@ -0,0 +1,104 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.api.protocolrecords;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
import org.apache.hadoop.yarn.util.Records;
/**
* {@code LogAggregationReport} is a report for log aggregation status
* in one NodeManager of an application.
* <p>
* It includes details such as:
* <ul>
* <li>{@link ApplicationId} of the application.</li>
* <li>{@link NodeId} of the NodeManager.</li>
* <li>{@link LogAggregationStatus}</li>
* <li>Diagnostic information</li>
* </ul>
*
*/
@Public
@Unstable
public abstract class LogAggregationReport {
@Public
@Unstable
public static LogAggregationReport newInstance(ApplicationId appId,
NodeId nodeId, LogAggregationStatus status, String diagnosticMessage) {
LogAggregationReport report = Records.newRecord(LogAggregationReport.class);
report.setApplicationId(appId);
report.setLogAggregationStatus(status);
report.setDiagnosticMessage(diagnosticMessage);
return report;
}
/**
* Get the <code>ApplicationId</code> of the application.
* @return <code>ApplicationId</code> of the application
*/
@Public
@Unstable
public abstract ApplicationId getApplicationId();
@Public
@Unstable
public abstract void setApplicationId(ApplicationId appId);
/**
* Get the <code>NodeId</code>.
* @return <code>NodeId</code>
*/
@Public
@Unstable
public abstract NodeId getNodeId();
@Public
@Unstable
public abstract void setNodeId(NodeId nodeId);
/**
* Get the <code>LogAggregationStatus</code>.
* @return <code>LogAggregationStatus</code>
*/
@Public
@Unstable
public abstract LogAggregationStatus getLogAggregationStatus();
@Public
@Unstable
public abstract void setLogAggregationStatus(
LogAggregationStatus logAggregationStatus);
/**
* Get the <em>diagnositic information</em> of this log aggregation
* @return <em>diagnositic information</em> of this log aggregation
*/
@Public
@Unstable
public abstract String getDiagnosticMessage();
@Public
@Unstable
public abstract void setDiagnosticMessage(String diagnosticMessage);
}

View File

@ -18,8 +18,10 @@
package org.apache.hadoop.yarn.server.api.protocolrecords;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
import org.apache.hadoop.yarn.util.Records;
@ -51,4 +53,10 @@ public abstract class NodeHeartbeatRequest {
public abstract Set<String> getNodeLabels();
public abstract void setNodeLabels(Set<String> nodeLabels);
public abstract Map<ApplicationId, LogAggregationReport>
getLogAggregationReportsForApps();
public abstract void setLogAggregationReportsForApps(
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps);
}

View File

@ -0,0 +1,227 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl;
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.LogAggregationStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportProtoOrBuilder;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
import com.google.protobuf.TextFormat;
@Private
@Unstable
public class LogAggregationReportPBImpl extends LogAggregationReport {
LogAggregationReportProto proto = LogAggregationReportProto
.getDefaultInstance();
LogAggregationReportProto.Builder builder = null;
boolean viaProto = false;
private static final String LOGAGGREGATION_STATUS_PREFIX = "LOG_";
private ApplicationId applicationId;
private NodeId nodeId;
public LogAggregationReportPBImpl() {
builder = LogAggregationReportProto.newBuilder();
}
public LogAggregationReportPBImpl(LogAggregationReportProto proto) {
this.proto = proto;
viaProto = true;
}
public LogAggregationReportProto getProto() {
mergeLocalToProto();
proto = viaProto ? proto : builder.build();
viaProto = true;
return proto;
}
@Override
public int hashCode() {
return getProto().hashCode();
}
@Override
public boolean equals(Object other) {
if (other == null)
return false;
if (other.getClass().isAssignableFrom(this.getClass())) {
return this.getProto().equals(this.getClass().cast(other).getProto());
}
return false;
}
@Override
public String toString() {
return TextFormat.shortDebugString(getProto());
}
private void mergeLocalToBuilder() {
if (this.applicationId != null
&& !((ApplicationIdPBImpl) this.applicationId).getProto().equals(
builder.getApplicationId())) {
builder.setApplicationId(convertToProtoFormat(this.applicationId));
}
if (this.nodeId != null
&& !((NodeIdPBImpl) this.nodeId).getProto().equals(
builder.getNodeId())) {
builder.setNodeId(convertToProtoFormat(this.nodeId));
}
}
private void mergeLocalToProto() {
if (viaProto)
maybeInitBuilder();
mergeLocalToBuilder();
proto = builder.build();
viaProto = true;
}
private void maybeInitBuilder() {
if (viaProto || builder == null) {
builder = LogAggregationReportProto.newBuilder(proto);
}
viaProto = false;
}
@Override
public ApplicationId getApplicationId() {
if (this.applicationId != null) {
return this.applicationId;
}
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasApplicationId()) {
return null;
}
this.applicationId = convertFromProtoFormat(p.getApplicationId());
return this.applicationId;
}
@Override
public void setApplicationId(ApplicationId appId) {
maybeInitBuilder();
if (appId == null)
builder.clearApplicationId();
this.applicationId = appId;
}
private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
return ((ApplicationIdPBImpl) t).getProto();
}
private ApplicationIdPBImpl convertFromProtoFormat(
ApplicationIdProto applicationId) {
return new ApplicationIdPBImpl(applicationId);
}
@Override
public LogAggregationStatus getLogAggregationStatus() {
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasLogAggregationStatus()) {
return null;
}
return convertFromProtoFormat(p.getLogAggregationStatus());
}
@Override
public void
setLogAggregationStatus(LogAggregationStatus logAggregationStatus) {
maybeInitBuilder();
if (logAggregationStatus == null) {
builder.clearLogAggregationStatus();
return;
}
builder.setLogAggregationStatus(convertToProtoFormat(logAggregationStatus));
}
private LogAggregationStatus convertFromProtoFormat(
LogAggregationStatusProto s) {
return LogAggregationStatus.valueOf(s.name().replace(
LOGAGGREGATION_STATUS_PREFIX, ""));
}
private LogAggregationStatusProto
convertToProtoFormat(LogAggregationStatus s) {
return LogAggregationStatusProto.valueOf(LOGAGGREGATION_STATUS_PREFIX
+ s.name());
}
@Override
public String getDiagnosticMessage() {
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasDiagnostics()) {
return null;
}
return p.getDiagnostics();
}
@Override
public void setDiagnosticMessage(String diagnosticMessage) {
maybeInitBuilder();
if (diagnosticMessage == null) {
builder.clearDiagnostics();
return;
}
builder.setDiagnostics(diagnosticMessage);
}
@Override
public NodeId getNodeId() {
if (this.nodeId != null) {
return this.nodeId;
}
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasNodeId()) {
return null;
}
this.nodeId = convertFromProtoFormat(p.getNodeId());
return this.nodeId;
}
@Override
public void setNodeId(NodeId nodeId) {
maybeInitBuilder();
if (nodeId == null)
builder.clearNodeId();
this.nodeId = nodeId;
}
private NodeIdProto convertToProtoFormat(NodeId t) {
return ((NodeIdPBImpl) t).getProto();
}
private NodeIdPBImpl convertFromProtoFormat(NodeIdProto nodeId) {
return new NodeIdPBImpl(nodeId);
}
}

View File

@ -18,15 +18,24 @@
package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdToLabelsProto;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.StringArrayProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportsForAppsProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProtoOrBuilder;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
@ -42,6 +51,8 @@ public class NodeHeartbeatRequestPBImpl extends NodeHeartbeatRequest {
private MasterKey lastKnownContainerTokenMasterKey = null;
private MasterKey lastKnownNMTokenMasterKey = null;
private Set<String> labels = null;
private Map<ApplicationId, LogAggregationReport>
logAggregationReportsForApps = null;
public NodeHeartbeatRequestPBImpl() {
builder = NodeHeartbeatRequestProto.newBuilder();
@ -91,6 +102,25 @@ public class NodeHeartbeatRequestPBImpl extends NodeHeartbeatRequest {
builder.setNodeLabels(StringArrayProto.newBuilder()
.addAllElements(this.labels).build());
}
if (this.logAggregationReportsForApps != null) {
addLogAggregationStatusForAppsToProto();
}
}
private void addLogAggregationStatusForAppsToProto() {
maybeInitBuilder();
builder.clearLogAggregationReportsForApps();
for (Entry<ApplicationId, LogAggregationReport> entry : logAggregationReportsForApps
.entrySet()) {
builder.addLogAggregationReportsForApps(LogAggregationReportsForAppsProto
.newBuilder().setAppId(convertToProtoFormat(entry.getKey()))
.setLogAggregationReport(convertToProtoFormat(entry.getValue())));
}
}
private LogAggregationReportProto convertToProtoFormat(
LogAggregationReport value) {
return ((LogAggregationReportPBImpl) value).getProto();
}
private void mergeLocalToProto() {
@ -215,4 +245,54 @@ public class NodeHeartbeatRequestPBImpl extends NodeHeartbeatRequest {
StringArrayProto nodeLabels = p.getNodeLabels();
labels = new HashSet<String>(nodeLabels.getElementsList());
}
private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) {
return new ApplicationIdPBImpl(p);
}
private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
return ((ApplicationIdPBImpl) t).getProto();
}
@Override
public Map<ApplicationId, LogAggregationReport>
getLogAggregationReportsForApps() {
if (this.logAggregationReportsForApps != null) {
return this.logAggregationReportsForApps;
}
initLogAggregationReportsForApps();
return logAggregationReportsForApps;
}
private void initLogAggregationReportsForApps() {
NodeHeartbeatRequestProtoOrBuilder p = viaProto ? proto : builder;
List<LogAggregationReportsForAppsProto> list =
p.getLogAggregationReportsForAppsList();
this.logAggregationReportsForApps =
new HashMap<ApplicationId, LogAggregationReport>();
for (LogAggregationReportsForAppsProto c : list) {
ApplicationId appId = convertFromProtoFormat(c.getAppId());
LogAggregationReport report =
convertFromProtoFormat(c.getLogAggregationReport());
this.logAggregationReportsForApps.put(appId, report);
}
}
private LogAggregationReport convertFromProtoFormat(
LogAggregationReportProto logAggregationReport) {
return new LogAggregationReportPBImpl(logAggregationReport);
}
@Override
public void setLogAggregationReportsForApps(
Map<ApplicationId, LogAggregationReport> logAggregationStatusForApps) {
if (logAggregationStatusForApps == null
|| logAggregationStatusForApps.isEmpty()) {
return;
}
maybeInitBuilder();
this.logAggregationReportsForApps =
new HashMap<ApplicationId, LogAggregationReport>();
this.logAggregationReportsForApps.putAll(logAggregationStatusForApps);
}
}

View File

@ -0,0 +1,31 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.api.records;
/**
* <p>Status of Log aggregation.</p>
*/
public enum LogAggregationStatus {
DISABLED,
NOT_START,
RUNNING,
FINISHED,
FAILED,
TIME_OUT
}

View File

@ -21,8 +21,10 @@ package org.apache.hadoop.yarn.server.webapp;
import static org.apache.hadoop.yarn.util.StringHelper.join;
import static org.apache.hadoop.yarn.webapp.YarnWebParams.APPLICATION_ID;
import static org.apache.hadoop.yarn.webapp.YarnWebParams.WEB_UI_TYPE;
import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -47,6 +49,7 @@ import org.apache.hadoop.yarn.server.webapp.dao.AppInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
import org.apache.hadoop.yarn.util.Apps;
import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.webapp.ResponseInfo;
import org.apache.hadoop.yarn.webapp.YarnWebParams;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
@ -154,7 +157,7 @@ public class AppBlock extends HtmlBlock {
html.script().$type("text/javascript")._(script.toString())._();
}
info("Application Overview")
ResponseInfo overviewTable = info("Application Overview")
._("User:", app.getUser())
._("Name:", app.getName())
._("Application Type:", app.getType())
@ -181,8 +184,13 @@ public class AppBlock extends HtmlBlock {
.getAppState() == YarnApplicationState.FINISHED
|| app.getAppState() == YarnApplicationState.FAILED
|| app.getAppState() == YarnApplicationState.KILLED ? "History"
: "ApplicationMaster")
._("Diagnostics:",
: "ApplicationMaster");
if (webUiType != null
&& webUiType.equals(YarnWebParams.RM_WEB_UI)) {
overviewTable._("Log Aggregation Status",
root_url("logaggregationstatus", app.getAppId()), "Status");
}
overviewTable._("Diagnostics:",
app.getDiagnosticsInfo() == null ? "" : app.getDiagnosticsInfo());
Collection<ApplicationAttemptReport> attempts;

View File

@ -54,3 +54,11 @@ message VersionProto {
optional int32 minor_version = 2;
}
enum LogAggregationStatusProto {
LOG_DISABLED = 1;
LOG_NOT_START = 2;
LOG_RUNNING = 3;
LOG_FINISHED = 4;
LOG_TIME_OUT = 5;
}

View File

@ -50,6 +50,19 @@ message NodeHeartbeatRequestProto {
optional MasterKeyProto last_known_container_token_master_key = 2;
optional MasterKeyProto last_known_nm_token_master_key = 3;
optional StringArrayProto nodeLabels = 4;
repeated LogAggregationReportsForAppsProto log_aggregation_reports_for_apps = 5;
}
message LogAggregationReportsForAppsProto {
optional ApplicationIdProto appId = 1;
optional LogAggregationReportProto log_aggregation_report = 2;
}
message LogAggregationReportProto {
optional ApplicationIdProto application_id = 1;
optional NodeIdProto node_id = 2;
optional LogAggregationStatusProto log_aggregation_status = 3;
optional string diagnostics = 4 [default = "N/A"];
}
message NodeHeartbeatResponseProto {

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager;
import java.util.Map;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.security.Credentials;
@ -26,6 +27,7 @@ import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
@ -77,4 +79,7 @@ public interface Context {
boolean getDecommissioned();
void setDecommissioned(boolean isDecommissioned);
ConcurrentLinkedQueue<LogAggregationReport>
getLogAggregationStatusForApps();
}

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicBoolean;
@ -53,6 +54,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
@ -384,6 +386,8 @@ public class NodeManager extends CompositeService
.getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
private final NMStateStoreService stateStore;
private boolean isDecommissioned = false;
private final ConcurrentLinkedQueue<LogAggregationReport>
logAggregationReportForApps;
public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
NMTokenSecretManagerInNM nmTokenSecretManager,
@ -397,6 +401,8 @@ public class NodeManager extends CompositeService
this.nodeHealthStatus.setHealthReport("Healthy");
this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
this.stateStore = stateStore;
this.logAggregationReportForApps = new ConcurrentLinkedQueue<
LogAggregationReport>();
}
/**
@ -488,6 +494,12 @@ public class NodeManager extends CompositeService
Map<ApplicationId, Credentials> systemCredentials) {
this.systemCredentials = systemCredentials;
}
@Override
public ConcurrentLinkedQueue<LogAggregationReport>
getLogAggregationStatusForApps() {
return this.logAggregationReportForApps;
}
}

View File

@ -30,6 +30,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.Random;
import java.util.Set;
@ -58,6 +59,7 @@ import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
import org.apache.hadoop.yarn.server.api.ResourceTracker;
import org.apache.hadoop.yarn.server.api.ServerRMProxy;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
@ -73,6 +75,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
import org.apache.hadoop.yarn.util.Records;
import org.apache.hadoop.yarn.util.YarnVersionInfo;
import com.google.common.annotations.VisibleForTesting;
@ -115,6 +118,10 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
// Duration for which to track recently stopped container.
private long durationToTrackStoppedContainers;
private boolean logAggregationEnabled;
private final List<LogAggregationReport> logAggregationReportForAppsTempList;
private final NodeHealthCheckerService healthChecker;
private final NodeManagerMetrics metrics;
@ -144,6 +151,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
this.recentlyStoppedContainers = new LinkedHashMap<ContainerId, Long>();
this.pendingCompletedContainers =
new HashMap<ContainerId, ContainerStatus>();
this.logAggregationReportForAppsTempList =
new ArrayList<LogAggregationReport>();
}
@Override
@ -193,6 +202,10 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
LOG.info("Initialized nodemanager for " + nodeId + ":" +
" physical-memory=" + memoryMb + " virtual-memory=" + virtualMemoryMb +
" virtual-cores=" + virtualCores);
this.logAggregationEnabled =
conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED);
}
@Override
@ -649,6 +662,18 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
NodeStatusUpdaterImpl.this.context
.getNMTokenSecretManager().getCurrentKey(),
nodeLabelsForHeartbeat);
if (logAggregationEnabled) {
// pull log aggregation status for application running in this NM
Map<ApplicationId, LogAggregationReport> logAggregationReports =
getLogAggregationReportsForApps(context
.getLogAggregationStatusForApps());
if (logAggregationReports != null
&& !logAggregationReports.isEmpty()) {
request.setLogAggregationReportsForApps(logAggregationReports);
}
}
response = resourceTracker.nodeHeartbeat(request);
//get next heartbeat interval from response
nextHeartBeatInterval = response.getNextHeartBeatInterval();
@ -698,6 +723,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
removeOrTrackCompletedContainersFromContext(response
.getContainersToBeRemovedFromNM());
logAggregationReportForAppsTempList.clear();
lastHeartbeatID = response.getResponseId();
List<ContainerId> containersToCleanup = response
.getContainersToCleanup();
@ -783,5 +809,47 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
statusUpdater.start();
}
private Map<ApplicationId, LogAggregationReport>
getLogAggregationReportsForApps(
ConcurrentLinkedQueue<LogAggregationReport> lastestLogAggregationStatus) {
Map<ApplicationId, LogAggregationReport> latestLogAggregationReports =
new HashMap<ApplicationId, LogAggregationReport>();
LogAggregationReport status;
while ((status = lastestLogAggregationStatus.poll()) != null) {
this.logAggregationReportForAppsTempList.add(status);
}
for (LogAggregationReport logAggregationReport
: this.logAggregationReportForAppsTempList) {
LogAggregationReport report = null;
if (latestLogAggregationReports.containsKey(logAggregationReport
.getApplicationId())) {
report =
latestLogAggregationReports.get(logAggregationReport
.getApplicationId());
report.setLogAggregationStatus(logAggregationReport
.getLogAggregationStatus());
String message = report.getDiagnosticMessage();
if (logAggregationReport.getDiagnosticMessage() != null
&& !logAggregationReport.getDiagnosticMessage().isEmpty()) {
if (message != null) {
message += logAggregationReport.getDiagnosticMessage();
} else {
message = logAggregationReport.getDiagnosticMessage();
}
report.setDiagnosticMessage(message);
}
} else {
report = Records.newRecord(LogAggregationReport.class);
report.setApplicationId(logAggregationReport.getApplicationId());
report.setNodeId(this.nodeId);
report.setLogAggregationStatus(logAggregationReport
.getLogAggregationStatus());
report
.setDiagnosticMessage(logAggregationReport.getDiagnosticMessage());
}
latestLogAggregationReports.put(logAggregationReport.getApplicationId(),
report);
}
return latestLogAggregationReports;
}
}

View File

@ -57,12 +57,16 @@ import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogWriter;
import org.apache.hadoop.yarn.logaggregation.ContainerLogsRetentionPolicy;
import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.apache.hadoop.yarn.util.Times;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
@ -120,6 +124,8 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
// This variable is only for testing
private final AtomicBoolean waiting = new AtomicBoolean(false);
private boolean renameTemporaryLogFileFailed = false;
private final Map<ContainerId, ContainerLogAggregator> containerLogAggregators =
new HashMap<ContainerId, ContainerLogAggregator>();
@ -292,12 +298,14 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
writer.close();
}
long currentTime = System.currentTimeMillis();
final Path renamedPath = this.rollingMonitorInterval <= 0
? remoteNodeLogFileForApp : new Path(
remoteNodeLogFileForApp.getParent(),
remoteNodeLogFileForApp.getName() + "_"
+ System.currentTimeMillis());
+ currentTime);
String diagnosticMessage = "";
final boolean rename = uploadedLogsInThisCycle;
try {
userUgi.doAs(new PrivilegedExceptionAction<Object>() {
@ -314,12 +322,36 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
return null;
}
});
diagnosticMessage =
"Log uploaded successfully for Application: " + appId
+ " in NodeManager: "
+ LogAggregationUtils.getNodeString(nodeId) + " at "
+ Times.format(currentTime) + "\n";
} catch (Exception e) {
LOG.error(
"Failed to move temporary log file to final location: ["
+ remoteNodeTmpLogFileForApp + "] to ["
+ renamedPath + "]", e);
diagnosticMessage =
"Log uploaded failed for Application: " + appId
+ " in NodeManager: "
+ LogAggregationUtils.getNodeString(nodeId) + " at "
+ Times.format(currentTime) + "\n";
renameTemporaryLogFileFailed = true;
}
LogAggregationReport report =
Records.newRecord(LogAggregationReport.class);
report.setApplicationId(appId);
report.setNodeId(nodeId);
report.setDiagnosticMessage(diagnosticMessage);
if (appFinished) {
report.setLogAggregationStatus(renameTemporaryLogFileFailed
? LogAggregationStatus.FAILED : LogAggregationStatus.FINISHED);
} else {
report.setLogAggregationStatus(LogAggregationStatus.RUNNING);
}
this.context.getLogAggregationStatusForApps().add(report);
} finally {
if (writer != null) {
writer.close();

View File

@ -458,10 +458,16 @@ public class ResourceTrackerService extends AbstractService implements
}
// 4. Send status to RMNode, saving the latest response.
this.rmContext.getDispatcher().getEventHandler().handle(
RMNodeStatusEvent nodeStatusEvent =
new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(),
remoteNodeStatus.getContainersStatuses(),
remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse));
remoteNodeStatus.getContainersStatuses(),
remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse);
if (request.getLogAggregationReportsForApps() != null
&& !request.getLogAggregationReportsForApps().isEmpty()) {
nodeStatusEvent.setLogAggregationReportsForApps(request
.getLogAggregationReportsForApps());
}
this.rmContext.getDispatcher().getEventHandler().handle(nodeStatusEvent);
// 5. Update node's labels to RM's NodeLabelManager.
if (isDistributesNodeLabelsConf && request.getNodeLabels() != null) {

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@ -242,4 +243,6 @@ public interface RMApp extends EventHandler<RMAppEvent> {
ReservationId getReservationId();
ResourceRequest getAMResourceRequest();
Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp();
}

View File

@ -25,9 +25,11 @@ import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@ -61,6 +63,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
@ -142,6 +146,12 @@ public class RMAppImpl implements RMApp, Recoverable {
new AppFinishedTransition();
private Set<NodeId> ranNodes = new ConcurrentSkipListSet<NodeId>();
private final boolean logAggregationEnabled;
private long logAggregationStartTime = 0;
private final long logAggregationStatusTimeout;
private final Map<NodeId, LogAggregationReport> logAggregationStatus =
new HashMap<NodeId, LogAggregationReport>();
// These states stored are only valid when app is at killing or final_saving.
private RMAppState stateBeforeKilling;
private RMAppState stateBeforeFinalSaving;
@ -413,6 +423,19 @@ public class RMAppImpl implements RMApp, Recoverable {
rmContext.getRMApplicationHistoryWriter().applicationStarted(this);
rmContext.getSystemMetricsPublisher().appCreated(this, startTime);
long localLogAggregationStatusTimeout =
conf.getLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_STATUS_TIME_OUT_MS);
if (localLogAggregationStatusTimeout <= 0) {
this.logAggregationStatusTimeout =
YarnConfiguration.DEFAULT_LOG_AGGREGATION_STATUS_TIME_OUT_MS;
} else {
this.logAggregationStatusTimeout = localLogAggregationStatusTimeout;
}
this.logAggregationEnabled =
conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED);
}
@Override
@ -803,6 +826,12 @@ public class RMAppImpl implements RMApp, Recoverable {
// otherwise, add it to ranNodes for further process
app.ranNodes.add(nodeAddedEvent.getNodeId());
app.logAggregationStatus.put(nodeAddedEvent.getNodeId(),
LogAggregationReport.newInstance(app.applicationId, nodeAddedEvent
.getNodeId(), app.logAggregationEnabled
? LogAggregationStatus.NOT_START : LogAggregationStatus.DISABLED,
""));
};
}
@ -1153,6 +1182,7 @@ public class RMAppImpl implements RMApp, Recoverable {
}
public void transition(RMAppImpl app, RMAppEvent event) {
app.logAggregationStartTime = System.currentTimeMillis();
for (NodeId nodeId : app.getRanNodes()) {
app.handler.handle(
new RMNodeCleanAppEvent(nodeId, app.applicationId));
@ -1356,4 +1386,62 @@ public class RMAppImpl implements RMApp, Recoverable {
}
return credentials;
}
@Override
public Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp() {
try {
this.readLock.lock();
Map<NodeId, LogAggregationReport> outputs =
new HashMap<NodeId, LogAggregationReport>();
outputs.putAll(logAggregationStatus);
for (Entry<NodeId, LogAggregationReport> output : outputs.entrySet()) {
if (!output.getValue().getLogAggregationStatus()
.equals(LogAggregationStatus.TIME_OUT)
&& !output.getValue().getLogAggregationStatus()
.equals(LogAggregationStatus.FINISHED)
&& isAppInFinalState(this)
&& System.currentTimeMillis() > this.logAggregationStartTime
+ this.logAggregationStatusTimeout) {
output.getValue().setLogAggregationStatus(
LogAggregationStatus.TIME_OUT);
}
}
return outputs;
} finally {
this.readLock.unlock();
}
}
public void aggregateLogReport(NodeId nodeId, LogAggregationReport report) {
try {
this.writeLock.lock();
if (this.logAggregationEnabled) {
LogAggregationReport curReport = this.logAggregationStatus.get(nodeId);
if (curReport == null) {
this.logAggregationStatus.put(nodeId, report);
} else {
if (curReport.getLogAggregationStatus().equals(
LogAggregationStatus.TIME_OUT)) {
if (report.getLogAggregationStatus().equals(
LogAggregationStatus.FINISHED)) {
curReport.setLogAggregationStatus(report
.getLogAggregationStatus());
}
} else {
curReport.setLogAggregationStatus(report.getLogAggregationStatus());
}
if (report.getDiagnosticMessage() != null
&& !report.getDiagnosticMessage().isEmpty()) {
curReport
.setDiagnosticMessage(curReport.getDiagnosticMessage() == null
? report.getDiagnosticMessage() : curReport
.getDiagnosticMessage() + report.getDiagnosticMessage());
}
}
}
} finally {
this.writeLock.unlock();
}
}
}

View File

@ -22,6 +22,8 @@ import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentLinkedQueue;
@ -47,6 +49,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
@ -56,6 +59,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
@ -773,6 +777,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
rmNode.handleContainerStatus(statusEvent.getContainers());
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps =
statusEvent.getLogAggregationReportsForApps();
if (logAggregationReportsForApps != null
&& !logAggregationReportsForApps.isEmpty()) {
rmNode.handleLogAggregationStatus(logAggregationReportsForApps);
}
if(rmNode.nextHeartBeat) {
rmNode.nextHeartBeat = false;
rmNode.context.getDispatcher().getEventHandler().handle(
@ -903,4 +914,15 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
}
}
private void handleLogAggregationStatus(
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps) {
for (Entry<ApplicationId, LogAggregationReport> report :
logAggregationReportsForApps.entrySet()) {
RMApp rmApp = this.context.getRMApps().get(report.getKey());
if (rmApp != null) {
((RMAppImpl)rmApp).aggregateLogReport(this.nodeId, report.getValue());
}
}
}
}

View File

@ -19,10 +19,12 @@
package org.apache.hadoop.yarn.server.resourcemanager.rmnode;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
@ -32,6 +34,7 @@ public class RMNodeStatusEvent extends RMNodeEvent {
private final List<ContainerStatus> containersCollection;
private final NodeHeartbeatResponse latestResponse;
private final List<ApplicationId> keepAliveAppIds;
private Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps;
public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus,
List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds,
@ -41,6 +44,19 @@ public class RMNodeStatusEvent extends RMNodeEvent {
this.containersCollection = collection;
this.keepAliveAppIds = keepAliveAppIds;
this.latestResponse = latestResponse;
this.logAggregationReportsForApps = null;
}
public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus,
List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds,
NodeHeartbeatResponse latestResponse,
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps) {
super(nodeId, RMNodeEventType.STATUS_UPDATE);
this.nodeHealthStatus = nodeHealthStatus;
this.containersCollection = collection;
this.keepAliveAppIds = keepAliveAppIds;
this.latestResponse = latestResponse;
this.logAggregationReportsForApps = logAggregationReportsForApps;
}
public NodeHealthStatus getNodeHealthStatus() {
@ -58,4 +74,14 @@ public class RMNodeStatusEvent extends RMNodeEvent {
public List<ApplicationId> getKeepAliveAppIds() {
return this.keepAliveAppIds;
}
public Map<ApplicationId, LogAggregationReport>
getLogAggregationReportsForApps() {
return this.logAggregationReportsForApps;
}
public void setLogAggregationReportsForApps(
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps) {
this.logAggregationReportsForApps = logAggregationReportsForApps;
}
}

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import static org.apache.hadoop.yarn.util.StringHelper.join;
import org.apache.hadoop.yarn.webapp.SubView;
import org.apache.hadoop.yarn.webapp.YarnWebParams;
public class AppLogAggregationStatusPage extends RmView{
@Override
protected void preHead(Page.HTML<_> html) {
commonPreHead(html);
String appId = $(YarnWebParams.APPLICATION_ID);
set(
TITLE,
appId.isEmpty() ? "Bad request: missing application ID" : join(
"Application ", $(YarnWebParams.APPLICATION_ID)));
}
@Override
protected Class<? extends SubView> content() {
return RMAppLogAggregationStatusBlock.class;
}
}

View File

@ -0,0 +1,148 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import static org.apache.hadoop.yarn.util.StringHelper.join;
import static org.apache.hadoop.yarn.webapp.YarnWebParams.APPLICATION_ID;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.util.Apps;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
import com.google.inject.Inject;
public class RMAppLogAggregationStatusBlock extends HtmlBlock {
private static final Log LOG = LogFactory
.getLog(RMAppLogAggregationStatusBlock.class);
private final ResourceManager rm;
private final Configuration conf;
@Inject
RMAppLogAggregationStatusBlock(ViewContext ctx, ResourceManager rm,
Configuration conf) {
super(ctx);
this.rm = rm;
this.conf = conf;
}
@Override
protected void render(Block html) {
String aid = $(APPLICATION_ID);
if (aid.isEmpty()) {
puts("Bad request: requires Application ID");
return;
}
ApplicationId appId;
try {
appId = Apps.toAppID(aid);
} catch (Exception e) {
puts("Invalid Application ID: " + aid);
return;
}
setTitle(join("Application ", aid));
// Add LogAggregationStatus description table
// to explain the meaning of different LogAggregationStatus
DIV<Hamlet> div_description = html.div(_INFO_WRAP);
TABLE<DIV<Hamlet>> table_description =
div_description.table("#LogAggregationStatusDecription");
table_description.
tr().
th(_TH, "Log Aggregation Status").
th(_TH, "Description").
_();
table_description.tr().td(LogAggregationStatus.DISABLED.name())
.td("Log Aggregation is Disabled.")._();
table_description.tr().td(LogAggregationStatus.NOT_START.name())
.td("Log Aggregation does not Start.")._();
table_description.tr().td(LogAggregationStatus.RUNNING.name())
.td("Log Aggregation is Running.")._();
table_description.tr().td(LogAggregationStatus.FINISHED.name())
.td("Log Aggregation is Finished. All of the logs have been "
+ "aggregated successfully.")._();
table_description.tr().td(LogAggregationStatus.FAILED.name())
.td("Log Aggregation is Failed. At least one of the logs "
+ "have not been aggregated.")._();
table_description.tr().td(LogAggregationStatus.TIME_OUT.name())
.td("Does not get the Log aggregation status for a long time. "
+ "Not sure what is the current Log Aggregation Status.")._();
table_description._();
div_description._();
boolean logAggregationEnabled =
conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED);
// Application Log aggregation status Table
DIV<Hamlet> div = html.div(_INFO_WRAP);
TABLE<DIV<Hamlet>> table =
div.h3(
"Log Aggregation: "
+ (logAggregationEnabled ? "Enabled" : "Disabled")).table(
"#LogAggregationStatus");
table.
tr().
th(_TH, "NodeId").
th(_TH, "Log Aggregation Status").
th(_TH, "Diagnostis Message").
_();
RMApp rmApp = rm.getRMContext().getRMApps().get(appId);
if (rmApp != null) {
Map<NodeId, LogAggregationReport> logAggregationReports =
rmApp.getLogAggregationReportsForApp();
if (logAggregationReports != null && !logAggregationReports.isEmpty()) {
for (Entry<NodeId, LogAggregationReport> report :
logAggregationReports.entrySet()) {
LogAggregationStatus status =
report.getValue() == null ? null : report.getValue()
.getLogAggregationStatus();
String message =
report.getValue() == null ? null : report.getValue()
.getDiagnosticMessage();
table.tr()
.td(report.getKey().toString())
.td(status == null ? "N/A" : status.toString())
.td(message == null ? "N/A" : message)._();
}
}
}
table._();
div._();
}
}

View File

@ -68,6 +68,8 @@ public class RMWebApp extends WebApp implements YarnWebParams {
"appattempt");
route(pajoin("/container", CONTAINER_ID), RmController.class, "container");
route("/errors-and-warnings", RmController.class, "errorsAndWarnings");
route(pajoin("/logaggregationstatus", APPLICATION_ID),
RmController.class, "logaggregationstatus");
}
@Override

View File

@ -109,4 +109,8 @@ public class RmController extends Controller {
public void errorsAndWarnings() {
render(RMErrorsAndWarningsPage.class);
}
public void logaggregationstatus() {
render(AppLogAggregationStatusPage.class);
}
}

View File

@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics;
@ -190,6 +191,11 @@ public abstract class MockAsm extends MockApps {
public ResourceRequest getAMResourceRequest() {
return this.amReq;
}
@Override
public Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp() {
throw new UnsupportedOperationException("Not supported yet.");
}
}
public static RMApp newApplication(int i) {

View File

@ -0,0 +1,318 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.logaggregationstatus;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.event.InlineDispatcher;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
public class TestRMAppLogAggregationStatus {
private RMContext rmContext;
private YarnScheduler scheduler;
private SchedulerEventType eventType;
private ApplicationId appId;
private final class TestSchedulerEventDispatcher implements
EventHandler<SchedulerEvent> {
@Override
public void handle(SchedulerEvent event) {
scheduler.handle(event);
}
}
@Before
public void setUp() throws Exception {
InlineDispatcher rmDispatcher = new InlineDispatcher();
rmContext =
new RMContextImpl(rmDispatcher, null, null, null,
null, null, null, null, null,
new RMApplicationHistoryWriter());
rmContext.setSystemMetricsPublisher(new SystemMetricsPublisher());
scheduler = mock(YarnScheduler.class);
doAnswer(
new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
final SchedulerEvent event = (SchedulerEvent)(invocation.getArguments()[0]);
eventType = event.getType();
if (eventType == SchedulerEventType.NODE_UPDATE) {
//DO NOTHING
}
return null;
}
}
).when(scheduler).handle(any(SchedulerEvent.class));
rmDispatcher.register(SchedulerEventType.class,
new TestSchedulerEventDispatcher());
appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
}
@After
public void tearDown() throws Exception {
}
@Test
public void testLogAggregationStatus() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
conf.setLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS, 1500);
RMApp rmApp = createRMApp(conf);
this.rmContext.getRMApps().put(appId, rmApp);
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.START));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_NEW_SAVED));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_ACCEPTED));
// This application will be running on two nodes
NodeId nodeId1 = NodeId.newInstance("localhost", 1234);
Resource capability = Resource.newInstance(4096, 4);
RMNodeImpl node1 =
new RMNodeImpl(nodeId1, rmContext, null, 0, 0, null, capability, null);
node1.handle(new RMNodeStartedEvent(nodeId1, null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId1));
NodeId nodeId2 = NodeId.newInstance("localhost", 2345);
RMNodeImpl node2 =
new RMNodeImpl(nodeId2, rmContext, null, 0, 0, null, capability, null);
node2.handle(new RMNodeStartedEvent(node2.getNodeID(), null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId2));
// The initial log aggregation status for these two nodes
// should be NOT_STARTED
Map<NodeId, LogAggregationReport> logAggregationStatus =
rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
.entrySet()) {
Assert.assertEquals(LogAggregationStatus.NOT_START, report.getValue()
.getLogAggregationStatus());
}
Map<ApplicationId, LogAggregationReport> node1ReportForApp =
new HashMap<ApplicationId, LogAggregationReport>();
String messageForNode1_1 =
"node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1 =
LogAggregationReport.newInstance(appId, nodeId1,
LogAggregationStatus.RUNNING, messageForNode1_1);
node1ReportForApp.put(appId, report1);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
null, node1ReportForApp));
Map<ApplicationId, LogAggregationReport> node2ReportForApp =
new HashMap<ApplicationId, LogAggregationReport>();
String messageForNode2_1 =
"node2 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report2 =
LogAggregationReport.newInstance(appId, nodeId2,
LogAggregationStatus.RUNNING, messageForNode2_1);
node2ReportForApp.put(appId, report2);
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), NodeHealthStatus
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
null, node2ReportForApp));
// node1 and node2 has updated its log aggregation status
// verify that the log aggregation status for node1, node2
// has been changed
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
.getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1, report.getValue()
.getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
.getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue()
.getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert
.fail("should not contain log aggregation report for other nodes");
}
}
// node1 updates its log aggregation status again
Map<ApplicationId, LogAggregationReport> node1ReportForApp2 =
new HashMap<ApplicationId, LogAggregationReport>();
String messageForNode1_2 =
"node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1_2 =
LogAggregationReport.newInstance(appId, nodeId1,
LogAggregationStatus.RUNNING, messageForNode1_2);
node1ReportForApp2.put(appId, report1_2);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
null, node1ReportForApp2));
// verify that the log aggregation status for node1
// has been changed
// verify that the log aggregation status for node2
// does not change
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
.getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1 + messageForNode1_2, report
.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
.getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue()
.getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert
.fail("should not contain log aggregation report for other nodes");
}
}
// kill the application
rmApp.handle(new RMAppEvent(appId, RMAppEventType.KILL));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.ATTEMPT_KILLED));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED));
Assert.assertEquals(RMAppState.KILLED, rmApp.getState());
// wait for 1500 ms
Thread.sleep(1500);
// the log aggregation status for both nodes should be changed
// to TIME_OUT
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
.entrySet()) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue()
.getLogAggregationStatus());
}
// Finally, node1 finished its log aggregation and sent out its final
// log aggregation status. The log aggregation status for node1 should
// be changed from TIME_OUT to Finished
Map<ApplicationId, LogAggregationReport> node1ReportForApp3 =
new HashMap<ApplicationId, LogAggregationReport>();
String messageForNode1_3 =
"node1 final logAggregation status updated at "
+ System.currentTimeMillis();
LogAggregationReport report1_3 =
LogAggregationReport.newInstance(appId, nodeId1,
LogAggregationStatus.FINISHED, messageForNode1_3);
node1ReportForApp3.put(appId, report1_3);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
null, node1ReportForApp3));
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.FINISHED, report.getValue()
.getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1 + messageForNode1_2
+ messageForNode1_3, report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue()
.getLogAggregationStatus());
} else {
// should not contain log aggregation report for other nodes
Assert
.fail("should not contain log aggregation report for other nodes");
}
}
}
private RMApp createRMApp(Configuration conf) {
ApplicationSubmissionContext submissionContext =
ApplicationSubmissionContext.newInstance(appId, "test", "default",
Priority.newInstance(0), null, false, true,
2, Resource.newInstance(10, 2), "test");
return new RMAppImpl(this.appId, this.rmContext,
conf, "test", "test", "default", submissionContext,
this.rmContext.getScheduler(),
this.rmContext.getApplicationMasterService(),
System.currentTimeMillis(), "test",
null, null);
}
}

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@ -271,4 +272,9 @@ public class MockRMApp implements RMApp {
public ResourceRequest getAMResourceRequest() {
return this.amReq;
}
@Override
public Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp() {
throw new UnsupportedOperationException("Not supported yet.");
}
}