YARN-1376. NM need to notify the log aggregation status to RM through Node heartbeat. Contributed by Xuan Gong.
This commit is contained in:
parent
83979e61a4
commit
92431c9617
|
@ -60,6 +60,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
container-executor for outbound network traffic control. (Sidharta Seethana
|
container-executor for outbound network traffic control. (Sidharta Seethana
|
||||||
via vinodkv)
|
via vinodkv)
|
||||||
|
|
||||||
|
YARN-1376. NM need to notify the log aggregation status to RM through
|
||||||
|
heartbeat. (Xuan Gong via junping_du)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-1880. Cleanup TestApplicationClientProtocolOnHA
|
YARN-1880. Cleanup TestApplicationClientProtocolOnHA
|
||||||
|
|
|
@ -741,6 +741,17 @@ public class YarnConfiguration extends Configuration {
|
||||||
YARN_PREFIX + "log-aggregation.retain-check-interval-seconds";
|
YARN_PREFIX + "log-aggregation.retain-check-interval-seconds";
|
||||||
public static final long DEFAULT_LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS = -1;
|
public static final long DEFAULT_LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How long for ResourceManager to wait for NodeManager to report its
|
||||||
|
* log aggregation status. If waiting time of which the log aggregation status
|
||||||
|
* is reported from NodeManager exceeds the configured value, RM will report
|
||||||
|
* log aggregation status for this NodeManager as TIME_OUT
|
||||||
|
*/
|
||||||
|
public static final String LOG_AGGREGATION_STATUS_TIME_OUT_MS =
|
||||||
|
YARN_PREFIX + "log-aggregation-status.time-out.ms";
|
||||||
|
public static final long DEFAULT_LOG_AGGREGATION_STATUS_TIME_OUT_MS
|
||||||
|
= 10 * 60 * 1000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of seconds to retain logs on the NodeManager. Only applicable if Log
|
* Number of seconds to retain logs on the NodeManager. Only applicable if Log
|
||||||
* aggregation is disabled
|
* aggregation is disabled
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.api.protocolrecords;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
|
||||||
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@code LogAggregationReport} is a report for log aggregation status
|
||||||
|
* in one NodeManager of an application.
|
||||||
|
* <p>
|
||||||
|
* It includes details such as:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link ApplicationId} of the application.</li>
|
||||||
|
* <li>{@link NodeId} of the NodeManager.</li>
|
||||||
|
* <li>{@link LogAggregationStatus}</li>
|
||||||
|
* <li>Diagnostic information</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract class LogAggregationReport {
|
||||||
|
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public static LogAggregationReport newInstance(ApplicationId appId,
|
||||||
|
NodeId nodeId, LogAggregationStatus status, String diagnosticMessage) {
|
||||||
|
LogAggregationReport report = Records.newRecord(LogAggregationReport.class);
|
||||||
|
report.setApplicationId(appId);
|
||||||
|
report.setLogAggregationStatus(status);
|
||||||
|
report.setDiagnosticMessage(diagnosticMessage);
|
||||||
|
return report;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the <code>ApplicationId</code> of the application.
|
||||||
|
* @return <code>ApplicationId</code> of the application
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract ApplicationId getApplicationId();
|
||||||
|
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract void setApplicationId(ApplicationId appId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the <code>NodeId</code>.
|
||||||
|
* @return <code>NodeId</code>
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract NodeId getNodeId();
|
||||||
|
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract void setNodeId(NodeId nodeId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the <code>LogAggregationStatus</code>.
|
||||||
|
* @return <code>LogAggregationStatus</code>
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract LogAggregationStatus getLogAggregationStatus();
|
||||||
|
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract void setLogAggregationStatus(
|
||||||
|
LogAggregationStatus logAggregationStatus);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the <em>diagnositic information</em> of this log aggregation
|
||||||
|
* @return <em>diagnositic information</em> of this log aggregation
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract String getDiagnosticMessage();
|
||||||
|
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract void setDiagnosticMessage(String diagnosticMessage);
|
||||||
|
}
|
|
@ -18,8 +18,10 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.api.protocolrecords;
|
package org.apache.hadoop.yarn.server.api.protocolrecords;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.server.api.records.MasterKey;
|
import org.apache.hadoop.yarn.server.api.records.MasterKey;
|
||||||
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
||||||
import org.apache.hadoop.yarn.util.Records;
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
@ -51,4 +53,10 @@ public abstract class NodeHeartbeatRequest {
|
||||||
|
|
||||||
public abstract Set<String> getNodeLabels();
|
public abstract Set<String> getNodeLabels();
|
||||||
public abstract void setNodeLabels(Set<String> nodeLabels);
|
public abstract void setNodeLabels(Set<String> nodeLabels);
|
||||||
|
|
||||||
|
public abstract Map<ApplicationId, LogAggregationReport>
|
||||||
|
getLogAggregationReportsForApps();
|
||||||
|
|
||||||
|
public abstract void setLogAggregationReportsForApps(
|
||||||
|
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,227 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
|
||||||
|
import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.LogAggregationStatusProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportProtoOrBuilder;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
|
||||||
|
|
||||||
|
import com.google.protobuf.TextFormat;
|
||||||
|
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public class LogAggregationReportPBImpl extends LogAggregationReport {
|
||||||
|
|
||||||
|
LogAggregationReportProto proto = LogAggregationReportProto
|
||||||
|
.getDefaultInstance();
|
||||||
|
LogAggregationReportProto.Builder builder = null;
|
||||||
|
boolean viaProto = false;
|
||||||
|
|
||||||
|
private static final String LOGAGGREGATION_STATUS_PREFIX = "LOG_";
|
||||||
|
|
||||||
|
private ApplicationId applicationId;
|
||||||
|
private NodeId nodeId;
|
||||||
|
|
||||||
|
public LogAggregationReportPBImpl() {
|
||||||
|
builder = LogAggregationReportProto.newBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public LogAggregationReportPBImpl(LogAggregationReportProto proto) {
|
||||||
|
this.proto = proto;
|
||||||
|
viaProto = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LogAggregationReportProto getProto() {
|
||||||
|
mergeLocalToProto();
|
||||||
|
proto = viaProto ? proto : builder.build();
|
||||||
|
viaProto = true;
|
||||||
|
return proto;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return getProto().hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (other == null)
|
||||||
|
return false;
|
||||||
|
if (other.getClass().isAssignableFrom(this.getClass())) {
|
||||||
|
return this.getProto().equals(this.getClass().cast(other).getProto());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return TextFormat.shortDebugString(getProto());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mergeLocalToBuilder() {
|
||||||
|
if (this.applicationId != null
|
||||||
|
&& !((ApplicationIdPBImpl) this.applicationId).getProto().equals(
|
||||||
|
builder.getApplicationId())) {
|
||||||
|
builder.setApplicationId(convertToProtoFormat(this.applicationId));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.nodeId != null
|
||||||
|
&& !((NodeIdPBImpl) this.nodeId).getProto().equals(
|
||||||
|
builder.getNodeId())) {
|
||||||
|
builder.setNodeId(convertToProtoFormat(this.nodeId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mergeLocalToProto() {
|
||||||
|
if (viaProto)
|
||||||
|
maybeInitBuilder();
|
||||||
|
mergeLocalToBuilder();
|
||||||
|
proto = builder.build();
|
||||||
|
viaProto = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void maybeInitBuilder() {
|
||||||
|
if (viaProto || builder == null) {
|
||||||
|
builder = LogAggregationReportProto.newBuilder(proto);
|
||||||
|
}
|
||||||
|
viaProto = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ApplicationId getApplicationId() {
|
||||||
|
if (this.applicationId != null) {
|
||||||
|
return this.applicationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
if (!p.hasApplicationId()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
this.applicationId = convertFromProtoFormat(p.getApplicationId());
|
||||||
|
return this.applicationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setApplicationId(ApplicationId appId) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (appId == null)
|
||||||
|
builder.clearApplicationId();
|
||||||
|
this.applicationId = appId;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
|
||||||
|
return ((ApplicationIdPBImpl) t).getProto();
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApplicationIdPBImpl convertFromProtoFormat(
|
||||||
|
ApplicationIdProto applicationId) {
|
||||||
|
return new ApplicationIdPBImpl(applicationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LogAggregationStatus getLogAggregationStatus() {
|
||||||
|
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
if (!p.hasLogAggregationStatus()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return convertFromProtoFormat(p.getLogAggregationStatus());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void
|
||||||
|
setLogAggregationStatus(LogAggregationStatus logAggregationStatus) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (logAggregationStatus == null) {
|
||||||
|
builder.clearLogAggregationStatus();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
builder.setLogAggregationStatus(convertToProtoFormat(logAggregationStatus));
|
||||||
|
}
|
||||||
|
|
||||||
|
private LogAggregationStatus convertFromProtoFormat(
|
||||||
|
LogAggregationStatusProto s) {
|
||||||
|
return LogAggregationStatus.valueOf(s.name().replace(
|
||||||
|
LOGAGGREGATION_STATUS_PREFIX, ""));
|
||||||
|
}
|
||||||
|
|
||||||
|
private LogAggregationStatusProto
|
||||||
|
convertToProtoFormat(LogAggregationStatus s) {
|
||||||
|
return LogAggregationStatusProto.valueOf(LOGAGGREGATION_STATUS_PREFIX
|
||||||
|
+ s.name());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDiagnosticMessage() {
|
||||||
|
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
if (!p.hasDiagnostics()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return p.getDiagnostics();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDiagnosticMessage(String diagnosticMessage) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (diagnosticMessage == null) {
|
||||||
|
builder.clearDiagnostics();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
builder.setDiagnostics(diagnosticMessage);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodeId getNodeId() {
|
||||||
|
if (this.nodeId != null) {
|
||||||
|
return this.nodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
LogAggregationReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
if (!p.hasNodeId()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
this.nodeId = convertFromProtoFormat(p.getNodeId());
|
||||||
|
return this.nodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNodeId(NodeId nodeId) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (nodeId == null)
|
||||||
|
builder.clearNodeId();
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
private NodeIdProto convertToProtoFormat(NodeId t) {
|
||||||
|
return ((NodeIdPBImpl) t).getProto();
|
||||||
|
}
|
||||||
|
|
||||||
|
private NodeIdPBImpl convertFromProtoFormat(NodeIdProto nodeId) {
|
||||||
|
return new NodeIdPBImpl(nodeId);
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,15 +18,24 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
|
package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdToLabelsProto;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.StringArrayProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.StringArrayProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
|
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto;
|
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.LogAggregationReportsForAppsProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProto;
|
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProtoOrBuilder;
|
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProtoOrBuilder;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
||||||
import org.apache.hadoop.yarn.server.api.records.MasterKey;
|
import org.apache.hadoop.yarn.server.api.records.MasterKey;
|
||||||
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
||||||
|
@ -42,6 +51,8 @@ public class NodeHeartbeatRequestPBImpl extends NodeHeartbeatRequest {
|
||||||
private MasterKey lastKnownContainerTokenMasterKey = null;
|
private MasterKey lastKnownContainerTokenMasterKey = null;
|
||||||
private MasterKey lastKnownNMTokenMasterKey = null;
|
private MasterKey lastKnownNMTokenMasterKey = null;
|
||||||
private Set<String> labels = null;
|
private Set<String> labels = null;
|
||||||
|
private Map<ApplicationId, LogAggregationReport>
|
||||||
|
logAggregationReportsForApps = null;
|
||||||
|
|
||||||
public NodeHeartbeatRequestPBImpl() {
|
public NodeHeartbeatRequestPBImpl() {
|
||||||
builder = NodeHeartbeatRequestProto.newBuilder();
|
builder = NodeHeartbeatRequestProto.newBuilder();
|
||||||
|
@ -91,6 +102,25 @@ public class NodeHeartbeatRequestPBImpl extends NodeHeartbeatRequest {
|
||||||
builder.setNodeLabels(StringArrayProto.newBuilder()
|
builder.setNodeLabels(StringArrayProto.newBuilder()
|
||||||
.addAllElements(this.labels).build());
|
.addAllElements(this.labels).build());
|
||||||
}
|
}
|
||||||
|
if (this.logAggregationReportsForApps != null) {
|
||||||
|
addLogAggregationStatusForAppsToProto();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addLogAggregationStatusForAppsToProto() {
|
||||||
|
maybeInitBuilder();
|
||||||
|
builder.clearLogAggregationReportsForApps();
|
||||||
|
for (Entry<ApplicationId, LogAggregationReport> entry : logAggregationReportsForApps
|
||||||
|
.entrySet()) {
|
||||||
|
builder.addLogAggregationReportsForApps(LogAggregationReportsForAppsProto
|
||||||
|
.newBuilder().setAppId(convertToProtoFormat(entry.getKey()))
|
||||||
|
.setLogAggregationReport(convertToProtoFormat(entry.getValue())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private LogAggregationReportProto convertToProtoFormat(
|
||||||
|
LogAggregationReport value) {
|
||||||
|
return ((LogAggregationReportPBImpl) value).getProto();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeLocalToProto() {
|
private void mergeLocalToProto() {
|
||||||
|
@ -215,4 +245,54 @@ public class NodeHeartbeatRequestPBImpl extends NodeHeartbeatRequest {
|
||||||
StringArrayProto nodeLabels = p.getNodeLabels();
|
StringArrayProto nodeLabels = p.getNodeLabels();
|
||||||
labels = new HashSet<String>(nodeLabels.getElementsList());
|
labels = new HashSet<String>(nodeLabels.getElementsList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) {
|
||||||
|
return new ApplicationIdPBImpl(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
|
||||||
|
return ((ApplicationIdPBImpl) t).getProto();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<ApplicationId, LogAggregationReport>
|
||||||
|
getLogAggregationReportsForApps() {
|
||||||
|
if (this.logAggregationReportsForApps != null) {
|
||||||
|
return this.logAggregationReportsForApps;
|
||||||
|
}
|
||||||
|
initLogAggregationReportsForApps();
|
||||||
|
return logAggregationReportsForApps;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initLogAggregationReportsForApps() {
|
||||||
|
NodeHeartbeatRequestProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
List<LogAggregationReportsForAppsProto> list =
|
||||||
|
p.getLogAggregationReportsForAppsList();
|
||||||
|
this.logAggregationReportsForApps =
|
||||||
|
new HashMap<ApplicationId, LogAggregationReport>();
|
||||||
|
for (LogAggregationReportsForAppsProto c : list) {
|
||||||
|
ApplicationId appId = convertFromProtoFormat(c.getAppId());
|
||||||
|
LogAggregationReport report =
|
||||||
|
convertFromProtoFormat(c.getLogAggregationReport());
|
||||||
|
this.logAggregationReportsForApps.put(appId, report);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private LogAggregationReport convertFromProtoFormat(
|
||||||
|
LogAggregationReportProto logAggregationReport) {
|
||||||
|
return new LogAggregationReportPBImpl(logAggregationReport);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setLogAggregationReportsForApps(
|
||||||
|
Map<ApplicationId, LogAggregationReport> logAggregationStatusForApps) {
|
||||||
|
if (logAggregationStatusForApps == null
|
||||||
|
|| logAggregationStatusForApps.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
maybeInitBuilder();
|
||||||
|
this.logAggregationReportsForApps =
|
||||||
|
new HashMap<ApplicationId, LogAggregationReport>();
|
||||||
|
this.logAggregationReportsForApps.putAll(logAggregationStatusForApps);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.api.records;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Status of Log aggregation.</p>
|
||||||
|
*/
|
||||||
|
public enum LogAggregationStatus {
|
||||||
|
DISABLED,
|
||||||
|
NOT_START,
|
||||||
|
RUNNING,
|
||||||
|
FINISHED,
|
||||||
|
FAILED,
|
||||||
|
TIME_OUT
|
||||||
|
}
|
|
@ -21,8 +21,10 @@ package org.apache.hadoop.yarn.server.webapp;
|
||||||
import static org.apache.hadoop.yarn.util.StringHelper.join;
|
import static org.apache.hadoop.yarn.util.StringHelper.join;
|
||||||
import static org.apache.hadoop.yarn.webapp.YarnWebParams.APPLICATION_ID;
|
import static org.apache.hadoop.yarn.webapp.YarnWebParams.APPLICATION_ID;
|
||||||
import static org.apache.hadoop.yarn.webapp.YarnWebParams.WEB_UI_TYPE;
|
import static org.apache.hadoop.yarn.webapp.YarnWebParams.WEB_UI_TYPE;
|
||||||
|
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringEscapeUtils;
|
import org.apache.commons.lang.StringEscapeUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -47,6 +49,7 @@ import org.apache.hadoop.yarn.server.webapp.dao.AppInfo;
|
||||||
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
|
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
|
||||||
import org.apache.hadoop.yarn.util.Apps;
|
import org.apache.hadoop.yarn.util.Apps;
|
||||||
import org.apache.hadoop.yarn.util.Times;
|
import org.apache.hadoop.yarn.util.Times;
|
||||||
|
import org.apache.hadoop.yarn.webapp.ResponseInfo;
|
||||||
import org.apache.hadoop.yarn.webapp.YarnWebParams;
|
import org.apache.hadoop.yarn.webapp.YarnWebParams;
|
||||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
|
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
|
||||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
|
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
|
||||||
|
@ -154,7 +157,7 @@ public class AppBlock extends HtmlBlock {
|
||||||
html.script().$type("text/javascript")._(script.toString())._();
|
html.script().$type("text/javascript")._(script.toString())._();
|
||||||
}
|
}
|
||||||
|
|
||||||
info("Application Overview")
|
ResponseInfo overviewTable = info("Application Overview")
|
||||||
._("User:", app.getUser())
|
._("User:", app.getUser())
|
||||||
._("Name:", app.getName())
|
._("Name:", app.getName())
|
||||||
._("Application Type:", app.getType())
|
._("Application Type:", app.getType())
|
||||||
|
@ -181,8 +184,13 @@ public class AppBlock extends HtmlBlock {
|
||||||
.getAppState() == YarnApplicationState.FINISHED
|
.getAppState() == YarnApplicationState.FINISHED
|
||||||
|| app.getAppState() == YarnApplicationState.FAILED
|
|| app.getAppState() == YarnApplicationState.FAILED
|
||||||
|| app.getAppState() == YarnApplicationState.KILLED ? "History"
|
|| app.getAppState() == YarnApplicationState.KILLED ? "History"
|
||||||
: "ApplicationMaster")
|
: "ApplicationMaster");
|
||||||
._("Diagnostics:",
|
if (webUiType != null
|
||||||
|
&& webUiType.equals(YarnWebParams.RM_WEB_UI)) {
|
||||||
|
overviewTable._("Log Aggregation Status",
|
||||||
|
root_url("logaggregationstatus", app.getAppId()), "Status");
|
||||||
|
}
|
||||||
|
overviewTable._("Diagnostics:",
|
||||||
app.getDiagnosticsInfo() == null ? "" : app.getDiagnosticsInfo());
|
app.getDiagnosticsInfo() == null ? "" : app.getDiagnosticsInfo());
|
||||||
|
|
||||||
Collection<ApplicationAttemptReport> attempts;
|
Collection<ApplicationAttemptReport> attempts;
|
||||||
|
|
|
@ -54,3 +54,11 @@ message VersionProto {
|
||||||
optional int32 minor_version = 2;
|
optional int32 minor_version = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum LogAggregationStatusProto {
|
||||||
|
LOG_DISABLED = 1;
|
||||||
|
LOG_NOT_START = 2;
|
||||||
|
LOG_RUNNING = 3;
|
||||||
|
LOG_FINISHED = 4;
|
||||||
|
LOG_TIME_OUT = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,19 @@ message NodeHeartbeatRequestProto {
|
||||||
optional MasterKeyProto last_known_container_token_master_key = 2;
|
optional MasterKeyProto last_known_container_token_master_key = 2;
|
||||||
optional MasterKeyProto last_known_nm_token_master_key = 3;
|
optional MasterKeyProto last_known_nm_token_master_key = 3;
|
||||||
optional StringArrayProto nodeLabels = 4;
|
optional StringArrayProto nodeLabels = 4;
|
||||||
|
repeated LogAggregationReportsForAppsProto log_aggregation_reports_for_apps = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LogAggregationReportsForAppsProto {
|
||||||
|
optional ApplicationIdProto appId = 1;
|
||||||
|
optional LogAggregationReportProto log_aggregation_report = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LogAggregationReportProto {
|
||||||
|
optional ApplicationIdProto application_id = 1;
|
||||||
|
optional NodeIdProto node_id = 2;
|
||||||
|
optional LogAggregationStatusProto log_aggregation_status = 3;
|
||||||
|
optional string diagnostics = 4 [default = "N/A"];
|
||||||
}
|
}
|
||||||
|
|
||||||
message NodeHeartbeatResponseProto {
|
message NodeHeartbeatResponseProto {
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.yarn.server.nodemanager;
|
package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
|
||||||
import org.apache.hadoop.security.Credentials;
|
import org.apache.hadoop.security.Credentials;
|
||||||
|
@ -26,6 +27,7 @@ import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
|
@ -77,4 +79,7 @@ public interface Context {
|
||||||
boolean getDecommissioned();
|
boolean getDecommissioned();
|
||||||
|
|
||||||
void setDecommissioned(boolean isDecommissioned);
|
void setDecommissioned(boolean isDecommissioned);
|
||||||
|
|
||||||
|
ConcurrentLinkedQueue<LogAggregationReport>
|
||||||
|
getLogAggregationStatusForApps();
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.ConcurrentSkipListMap;
|
import java.util.concurrent.ConcurrentSkipListMap;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
@ -53,6 +54,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
|
@ -384,6 +386,8 @@ public class NodeManager extends CompositeService
|
||||||
.getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
|
.getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
|
||||||
private final NMStateStoreService stateStore;
|
private final NMStateStoreService stateStore;
|
||||||
private boolean isDecommissioned = false;
|
private boolean isDecommissioned = false;
|
||||||
|
private final ConcurrentLinkedQueue<LogAggregationReport>
|
||||||
|
logAggregationReportForApps;
|
||||||
|
|
||||||
public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
|
public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
|
||||||
NMTokenSecretManagerInNM nmTokenSecretManager,
|
NMTokenSecretManagerInNM nmTokenSecretManager,
|
||||||
|
@ -397,6 +401,8 @@ public class NodeManager extends CompositeService
|
||||||
this.nodeHealthStatus.setHealthReport("Healthy");
|
this.nodeHealthStatus.setHealthReport("Healthy");
|
||||||
this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
|
this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
|
||||||
this.stateStore = stateStore;
|
this.stateStore = stateStore;
|
||||||
|
this.logAggregationReportForApps = new ConcurrentLinkedQueue<
|
||||||
|
LogAggregationReport>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -488,6 +494,12 @@ public class NodeManager extends CompositeService
|
||||||
Map<ApplicationId, Credentials> systemCredentials) {
|
Map<ApplicationId, Credentials> systemCredentials) {
|
||||||
this.systemCredentials = systemCredentials;
|
this.systemCredentials = systemCredentials;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConcurrentLinkedQueue<LogAggregationReport>
|
||||||
|
getLogAggregationStatusForApps() {
|
||||||
|
return this.logAggregationReportForApps;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -58,6 +59,7 @@ import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
|
||||||
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
|
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
|
||||||
import org.apache.hadoop.yarn.server.api.ResourceTracker;
|
import org.apache.hadoop.yarn.server.api.ResourceTracker;
|
||||||
import org.apache.hadoop.yarn.server.api.ServerRMProxy;
|
import org.apache.hadoop.yarn.server.api.ServerRMProxy;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
|
@ -73,6 +75,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
|
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
|
||||||
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -115,6 +118,10 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
// Duration for which to track recently stopped container.
|
// Duration for which to track recently stopped container.
|
||||||
private long durationToTrackStoppedContainers;
|
private long durationToTrackStoppedContainers;
|
||||||
|
|
||||||
|
private boolean logAggregationEnabled;
|
||||||
|
|
||||||
|
private final List<LogAggregationReport> logAggregationReportForAppsTempList;
|
||||||
|
|
||||||
private final NodeHealthCheckerService healthChecker;
|
private final NodeHealthCheckerService healthChecker;
|
||||||
private final NodeManagerMetrics metrics;
|
private final NodeManagerMetrics metrics;
|
||||||
|
|
||||||
|
@ -144,6 +151,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
this.recentlyStoppedContainers = new LinkedHashMap<ContainerId, Long>();
|
this.recentlyStoppedContainers = new LinkedHashMap<ContainerId, Long>();
|
||||||
this.pendingCompletedContainers =
|
this.pendingCompletedContainers =
|
||||||
new HashMap<ContainerId, ContainerStatus>();
|
new HashMap<ContainerId, ContainerStatus>();
|
||||||
|
this.logAggregationReportForAppsTempList =
|
||||||
|
new ArrayList<LogAggregationReport>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -193,6 +202,10 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
LOG.info("Initialized nodemanager for " + nodeId + ":" +
|
LOG.info("Initialized nodemanager for " + nodeId + ":" +
|
||||||
" physical-memory=" + memoryMb + " virtual-memory=" + virtualMemoryMb +
|
" physical-memory=" + memoryMb + " virtual-memory=" + virtualMemoryMb +
|
||||||
" virtual-cores=" + virtualCores);
|
" virtual-cores=" + virtualCores);
|
||||||
|
|
||||||
|
this.logAggregationEnabled =
|
||||||
|
conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
|
||||||
|
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -649,6 +662,18 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
NodeStatusUpdaterImpl.this.context
|
NodeStatusUpdaterImpl.this.context
|
||||||
.getNMTokenSecretManager().getCurrentKey(),
|
.getNMTokenSecretManager().getCurrentKey(),
|
||||||
nodeLabelsForHeartbeat);
|
nodeLabelsForHeartbeat);
|
||||||
|
|
||||||
|
if (logAggregationEnabled) {
|
||||||
|
// pull log aggregation status for application running in this NM
|
||||||
|
Map<ApplicationId, LogAggregationReport> logAggregationReports =
|
||||||
|
getLogAggregationReportsForApps(context
|
||||||
|
.getLogAggregationStatusForApps());
|
||||||
|
if (logAggregationReports != null
|
||||||
|
&& !logAggregationReports.isEmpty()) {
|
||||||
|
request.setLogAggregationReportsForApps(logAggregationReports);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
response = resourceTracker.nodeHeartbeat(request);
|
response = resourceTracker.nodeHeartbeat(request);
|
||||||
//get next heartbeat interval from response
|
//get next heartbeat interval from response
|
||||||
nextHeartBeatInterval = response.getNextHeartBeatInterval();
|
nextHeartBeatInterval = response.getNextHeartBeatInterval();
|
||||||
|
@ -698,6 +723,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
removeOrTrackCompletedContainersFromContext(response
|
removeOrTrackCompletedContainersFromContext(response
|
||||||
.getContainersToBeRemovedFromNM());
|
.getContainersToBeRemovedFromNM());
|
||||||
|
|
||||||
|
logAggregationReportForAppsTempList.clear();
|
||||||
lastHeartbeatID = response.getResponseId();
|
lastHeartbeatID = response.getResponseId();
|
||||||
List<ContainerId> containersToCleanup = response
|
List<ContainerId> containersToCleanup = response
|
||||||
.getContainersToCleanup();
|
.getContainersToCleanup();
|
||||||
|
@ -783,5 +809,47 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
statusUpdater.start();
|
statusUpdater.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Map<ApplicationId, LogAggregationReport>
|
||||||
|
getLogAggregationReportsForApps(
|
||||||
|
ConcurrentLinkedQueue<LogAggregationReport> lastestLogAggregationStatus) {
|
||||||
|
Map<ApplicationId, LogAggregationReport> latestLogAggregationReports =
|
||||||
|
new HashMap<ApplicationId, LogAggregationReport>();
|
||||||
|
LogAggregationReport status;
|
||||||
|
while ((status = lastestLogAggregationStatus.poll()) != null) {
|
||||||
|
this.logAggregationReportForAppsTempList.add(status);
|
||||||
|
}
|
||||||
|
for (LogAggregationReport logAggregationReport
|
||||||
|
: this.logAggregationReportForAppsTempList) {
|
||||||
|
LogAggregationReport report = null;
|
||||||
|
if (latestLogAggregationReports.containsKey(logAggregationReport
|
||||||
|
.getApplicationId())) {
|
||||||
|
report =
|
||||||
|
latestLogAggregationReports.get(logAggregationReport
|
||||||
|
.getApplicationId());
|
||||||
|
report.setLogAggregationStatus(logAggregationReport
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
String message = report.getDiagnosticMessage();
|
||||||
|
if (logAggregationReport.getDiagnosticMessage() != null
|
||||||
|
&& !logAggregationReport.getDiagnosticMessage().isEmpty()) {
|
||||||
|
if (message != null) {
|
||||||
|
message += logAggregationReport.getDiagnosticMessage();
|
||||||
|
} else {
|
||||||
|
message = logAggregationReport.getDiagnosticMessage();
|
||||||
|
}
|
||||||
|
report.setDiagnosticMessage(message);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
report = Records.newRecord(LogAggregationReport.class);
|
||||||
|
report.setApplicationId(logAggregationReport.getApplicationId());
|
||||||
|
report.setNodeId(this.nodeId);
|
||||||
|
report.setLogAggregationStatus(logAggregationReport
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
report
|
||||||
|
.setDiagnosticMessage(logAggregationReport.getDiagnosticMessage());
|
||||||
|
}
|
||||||
|
latestLogAggregationReports.put(logAggregationReport.getApplicationId(),
|
||||||
|
report);
|
||||||
|
}
|
||||||
|
return latestLogAggregationReports;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,12 +57,16 @@ import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue;
|
||||||
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogWriter;
|
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogWriter;
|
||||||
import org.apache.hadoop.yarn.logaggregation.ContainerLogsRetentionPolicy;
|
import org.apache.hadoop.yarn.logaggregation.ContainerLogsRetentionPolicy;
|
||||||
import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils;
|
import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
|
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
|
||||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||||
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
import org.apache.hadoop.yarn.util.Times;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Predicate;
|
import com.google.common.base.Predicate;
|
||||||
|
@ -120,6 +124,8 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
// This variable is only for testing
|
// This variable is only for testing
|
||||||
private final AtomicBoolean waiting = new AtomicBoolean(false);
|
private final AtomicBoolean waiting = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
private boolean renameTemporaryLogFileFailed = false;
|
||||||
|
|
||||||
private final Map<ContainerId, ContainerLogAggregator> containerLogAggregators =
|
private final Map<ContainerId, ContainerLogAggregator> containerLogAggregators =
|
||||||
new HashMap<ContainerId, ContainerLogAggregator>();
|
new HashMap<ContainerId, ContainerLogAggregator>();
|
||||||
|
|
||||||
|
@ -292,12 +298,14 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long currentTime = System.currentTimeMillis();
|
||||||
final Path renamedPath = this.rollingMonitorInterval <= 0
|
final Path renamedPath = this.rollingMonitorInterval <= 0
|
||||||
? remoteNodeLogFileForApp : new Path(
|
? remoteNodeLogFileForApp : new Path(
|
||||||
remoteNodeLogFileForApp.getParent(),
|
remoteNodeLogFileForApp.getParent(),
|
||||||
remoteNodeLogFileForApp.getName() + "_"
|
remoteNodeLogFileForApp.getName() + "_"
|
||||||
+ System.currentTimeMillis());
|
+ currentTime);
|
||||||
|
|
||||||
|
String diagnosticMessage = "";
|
||||||
final boolean rename = uploadedLogsInThisCycle;
|
final boolean rename = uploadedLogsInThisCycle;
|
||||||
try {
|
try {
|
||||||
userUgi.doAs(new PrivilegedExceptionAction<Object>() {
|
userUgi.doAs(new PrivilegedExceptionAction<Object>() {
|
||||||
|
@ -314,12 +322,36 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
diagnosticMessage =
|
||||||
|
"Log uploaded successfully for Application: " + appId
|
||||||
|
+ " in NodeManager: "
|
||||||
|
+ LogAggregationUtils.getNodeString(nodeId) + " at "
|
||||||
|
+ Times.format(currentTime) + "\n";
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error(
|
LOG.error(
|
||||||
"Failed to move temporary log file to final location: ["
|
"Failed to move temporary log file to final location: ["
|
||||||
+ remoteNodeTmpLogFileForApp + "] to ["
|
+ remoteNodeTmpLogFileForApp + "] to ["
|
||||||
+ renamedPath + "]", e);
|
+ renamedPath + "]", e);
|
||||||
|
diagnosticMessage =
|
||||||
|
"Log uploaded failed for Application: " + appId
|
||||||
|
+ " in NodeManager: "
|
||||||
|
+ LogAggregationUtils.getNodeString(nodeId) + " at "
|
||||||
|
+ Times.format(currentTime) + "\n";
|
||||||
|
renameTemporaryLogFileFailed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LogAggregationReport report =
|
||||||
|
Records.newRecord(LogAggregationReport.class);
|
||||||
|
report.setApplicationId(appId);
|
||||||
|
report.setNodeId(nodeId);
|
||||||
|
report.setDiagnosticMessage(diagnosticMessage);
|
||||||
|
if (appFinished) {
|
||||||
|
report.setLogAggregationStatus(renameTemporaryLogFileFailed
|
||||||
|
? LogAggregationStatus.FAILED : LogAggregationStatus.FINISHED);
|
||||||
|
} else {
|
||||||
|
report.setLogAggregationStatus(LogAggregationStatus.RUNNING);
|
||||||
|
}
|
||||||
|
this.context.getLogAggregationStatusForApps().add(report);
|
||||||
} finally {
|
} finally {
|
||||||
if (writer != null) {
|
if (writer != null) {
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
|
@ -458,10 +458,16 @@ public class ResourceTrackerService extends AbstractService implements
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. Send status to RMNode, saving the latest response.
|
// 4. Send status to RMNode, saving the latest response.
|
||||||
this.rmContext.getDispatcher().getEventHandler().handle(
|
RMNodeStatusEvent nodeStatusEvent =
|
||||||
new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(),
|
new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(),
|
||||||
remoteNodeStatus.getContainersStatuses(),
|
remoteNodeStatus.getContainersStatuses(),
|
||||||
remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse));
|
remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse);
|
||||||
|
if (request.getLogAggregationReportsForApps() != null
|
||||||
|
&& !request.getLogAggregationReportsForApps().isEmpty()) {
|
||||||
|
nodeStatusEvent.setLogAggregationReportsForApps(request
|
||||||
|
.getLogAggregationReportsForApps());
|
||||||
|
}
|
||||||
|
this.rmContext.getDispatcher().getEventHandler().handle(nodeStatusEvent);
|
||||||
|
|
||||||
// 5. Update node's labels to RM's NodeLabelManager.
|
// 5. Update node's labels to RM's NodeLabelManager.
|
||||||
if (isDistributesNodeLabelsConf && request.getNodeLabels() != null) {
|
if (isDistributesNodeLabelsConf && request.getNodeLabels() != null) {
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ReservationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
|
||||||
|
@ -242,4 +243,6 @@ public interface RMApp extends EventHandler<RMAppEvent> {
|
||||||
ReservationId getReservationId();
|
ReservationId getReservationId();
|
||||||
|
|
||||||
ResourceRequest getAMResourceRequest();
|
ResourceRequest getAMResourceRequest();
|
||||||
|
|
||||||
|
Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp();
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,9 +25,11 @@ import java.nio.ByteBuffer;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.ConcurrentSkipListSet;
|
import java.util.concurrent.ConcurrentSkipListSet;
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
@ -61,6 +63,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
|
import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
|
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
|
||||||
|
@ -142,6 +146,12 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
new AppFinishedTransition();
|
new AppFinishedTransition();
|
||||||
private Set<NodeId> ranNodes = new ConcurrentSkipListSet<NodeId>();
|
private Set<NodeId> ranNodes = new ConcurrentSkipListSet<NodeId>();
|
||||||
|
|
||||||
|
private final boolean logAggregationEnabled;
|
||||||
|
private long logAggregationStartTime = 0;
|
||||||
|
private final long logAggregationStatusTimeout;
|
||||||
|
private final Map<NodeId, LogAggregationReport> logAggregationStatus =
|
||||||
|
new HashMap<NodeId, LogAggregationReport>();
|
||||||
|
|
||||||
// These states stored are only valid when app is at killing or final_saving.
|
// These states stored are only valid when app is at killing or final_saving.
|
||||||
private RMAppState stateBeforeKilling;
|
private RMAppState stateBeforeKilling;
|
||||||
private RMAppState stateBeforeFinalSaving;
|
private RMAppState stateBeforeFinalSaving;
|
||||||
|
@ -413,6 +423,19 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
|
|
||||||
rmContext.getRMApplicationHistoryWriter().applicationStarted(this);
|
rmContext.getRMApplicationHistoryWriter().applicationStarted(this);
|
||||||
rmContext.getSystemMetricsPublisher().appCreated(this, startTime);
|
rmContext.getSystemMetricsPublisher().appCreated(this, startTime);
|
||||||
|
|
||||||
|
long localLogAggregationStatusTimeout =
|
||||||
|
conf.getLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS,
|
||||||
|
YarnConfiguration.DEFAULT_LOG_AGGREGATION_STATUS_TIME_OUT_MS);
|
||||||
|
if (localLogAggregationStatusTimeout <= 0) {
|
||||||
|
this.logAggregationStatusTimeout =
|
||||||
|
YarnConfiguration.DEFAULT_LOG_AGGREGATION_STATUS_TIME_OUT_MS;
|
||||||
|
} else {
|
||||||
|
this.logAggregationStatusTimeout = localLogAggregationStatusTimeout;
|
||||||
|
}
|
||||||
|
this.logAggregationEnabled =
|
||||||
|
conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
|
||||||
|
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -803,6 +826,12 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
|
|
||||||
// otherwise, add it to ranNodes for further process
|
// otherwise, add it to ranNodes for further process
|
||||||
app.ranNodes.add(nodeAddedEvent.getNodeId());
|
app.ranNodes.add(nodeAddedEvent.getNodeId());
|
||||||
|
|
||||||
|
app.logAggregationStatus.put(nodeAddedEvent.getNodeId(),
|
||||||
|
LogAggregationReport.newInstance(app.applicationId, nodeAddedEvent
|
||||||
|
.getNodeId(), app.logAggregationEnabled
|
||||||
|
? LogAggregationStatus.NOT_START : LogAggregationStatus.DISABLED,
|
||||||
|
""));
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1153,6 +1182,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||||
|
app.logAggregationStartTime = System.currentTimeMillis();
|
||||||
for (NodeId nodeId : app.getRanNodes()) {
|
for (NodeId nodeId : app.getRanNodes()) {
|
||||||
app.handler.handle(
|
app.handler.handle(
|
||||||
new RMNodeCleanAppEvent(nodeId, app.applicationId));
|
new RMNodeCleanAppEvent(nodeId, app.applicationId));
|
||||||
|
@ -1356,4 +1386,62 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
}
|
}
|
||||||
return credentials;
|
return credentials;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp() {
|
||||||
|
try {
|
||||||
|
this.readLock.lock();
|
||||||
|
Map<NodeId, LogAggregationReport> outputs =
|
||||||
|
new HashMap<NodeId, LogAggregationReport>();
|
||||||
|
outputs.putAll(logAggregationStatus);
|
||||||
|
for (Entry<NodeId, LogAggregationReport> output : outputs.entrySet()) {
|
||||||
|
if (!output.getValue().getLogAggregationStatus()
|
||||||
|
.equals(LogAggregationStatus.TIME_OUT)
|
||||||
|
&& !output.getValue().getLogAggregationStatus()
|
||||||
|
.equals(LogAggregationStatus.FINISHED)
|
||||||
|
&& isAppInFinalState(this)
|
||||||
|
&& System.currentTimeMillis() > this.logAggregationStartTime
|
||||||
|
+ this.logAggregationStatusTimeout) {
|
||||||
|
output.getValue().setLogAggregationStatus(
|
||||||
|
LogAggregationStatus.TIME_OUT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return outputs;
|
||||||
|
} finally {
|
||||||
|
this.readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void aggregateLogReport(NodeId nodeId, LogAggregationReport report) {
|
||||||
|
try {
|
||||||
|
this.writeLock.lock();
|
||||||
|
if (this.logAggregationEnabled) {
|
||||||
|
LogAggregationReport curReport = this.logAggregationStatus.get(nodeId);
|
||||||
|
if (curReport == null) {
|
||||||
|
this.logAggregationStatus.put(nodeId, report);
|
||||||
|
} else {
|
||||||
|
if (curReport.getLogAggregationStatus().equals(
|
||||||
|
LogAggregationStatus.TIME_OUT)) {
|
||||||
|
if (report.getLogAggregationStatus().equals(
|
||||||
|
LogAggregationStatus.FINISHED)) {
|
||||||
|
curReport.setLogAggregationStatus(report
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
curReport.setLogAggregationStatus(report.getLogAggregationStatus());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (report.getDiagnosticMessage() != null
|
||||||
|
&& !report.getDiagnosticMessage().isEmpty()) {
|
||||||
|
curReport
|
||||||
|
.setDiagnosticMessage(curReport.getDiagnosticMessage() == null
|
||||||
|
? report.getDiagnosticMessage() : curReport
|
||||||
|
.getDiagnosticMessage() + report.getDiagnosticMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
this.writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,8 @@ import java.util.ArrayList;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
|
@ -47,6 +49,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
|
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||||
|
@ -56,6 +59,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
|
||||||
|
@ -773,6 +777,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
|
|
||||||
rmNode.handleContainerStatus(statusEvent.getContainers());
|
rmNode.handleContainerStatus(statusEvent.getContainers());
|
||||||
|
|
||||||
|
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps =
|
||||||
|
statusEvent.getLogAggregationReportsForApps();
|
||||||
|
if (logAggregationReportsForApps != null
|
||||||
|
&& !logAggregationReportsForApps.isEmpty()) {
|
||||||
|
rmNode.handleLogAggregationStatus(logAggregationReportsForApps);
|
||||||
|
}
|
||||||
|
|
||||||
if(rmNode.nextHeartBeat) {
|
if(rmNode.nextHeartBeat) {
|
||||||
rmNode.nextHeartBeat = false;
|
rmNode.nextHeartBeat = false;
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
|
@ -903,4 +914,15 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void handleLogAggregationStatus(
|
||||||
|
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps) {
|
||||||
|
for (Entry<ApplicationId, LogAggregationReport> report :
|
||||||
|
logAggregationReportsForApps.entrySet()) {
|
||||||
|
RMApp rmApp = this.context.getRMApps().get(report.getKey());
|
||||||
|
if (rmApp != null) {
|
||||||
|
((RMAppImpl)rmApp).aggregateLogReport(this.nodeId, report.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,10 +19,12 @@
|
||||||
package org.apache.hadoop.yarn.server.resourcemanager.rmnode;
|
package org.apache.hadoop.yarn.server.resourcemanager.rmnode;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||||
|
|
||||||
|
@ -32,6 +34,7 @@ public class RMNodeStatusEvent extends RMNodeEvent {
|
||||||
private final List<ContainerStatus> containersCollection;
|
private final List<ContainerStatus> containersCollection;
|
||||||
private final NodeHeartbeatResponse latestResponse;
|
private final NodeHeartbeatResponse latestResponse;
|
||||||
private final List<ApplicationId> keepAliveAppIds;
|
private final List<ApplicationId> keepAliveAppIds;
|
||||||
|
private Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps;
|
||||||
|
|
||||||
public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus,
|
public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus,
|
||||||
List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds,
|
List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds,
|
||||||
|
@ -41,6 +44,19 @@ public class RMNodeStatusEvent extends RMNodeEvent {
|
||||||
this.containersCollection = collection;
|
this.containersCollection = collection;
|
||||||
this.keepAliveAppIds = keepAliveAppIds;
|
this.keepAliveAppIds = keepAliveAppIds;
|
||||||
this.latestResponse = latestResponse;
|
this.latestResponse = latestResponse;
|
||||||
|
this.logAggregationReportsForApps = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus,
|
||||||
|
List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds,
|
||||||
|
NodeHeartbeatResponse latestResponse,
|
||||||
|
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps) {
|
||||||
|
super(nodeId, RMNodeEventType.STATUS_UPDATE);
|
||||||
|
this.nodeHealthStatus = nodeHealthStatus;
|
||||||
|
this.containersCollection = collection;
|
||||||
|
this.keepAliveAppIds = keepAliveAppIds;
|
||||||
|
this.latestResponse = latestResponse;
|
||||||
|
this.logAggregationReportsForApps = logAggregationReportsForApps;
|
||||||
}
|
}
|
||||||
|
|
||||||
public NodeHealthStatus getNodeHealthStatus() {
|
public NodeHealthStatus getNodeHealthStatus() {
|
||||||
|
@ -58,4 +74,14 @@ public class RMNodeStatusEvent extends RMNodeEvent {
|
||||||
public List<ApplicationId> getKeepAliveAppIds() {
|
public List<ApplicationId> getKeepAliveAppIds() {
|
||||||
return this.keepAliveAppIds;
|
return this.keepAliveAppIds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Map<ApplicationId, LogAggregationReport>
|
||||||
|
getLogAggregationReportsForApps() {
|
||||||
|
return this.logAggregationReportsForApps;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLogAggregationReportsForApps(
|
||||||
|
Map<ApplicationId, LogAggregationReport> logAggregationReportsForApps) {
|
||||||
|
this.logAggregationReportsForApps = logAggregationReportsForApps;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.yarn.util.StringHelper.join;
|
||||||
|
import org.apache.hadoop.yarn.webapp.SubView;
|
||||||
|
import org.apache.hadoop.yarn.webapp.YarnWebParams;
|
||||||
|
|
||||||
|
public class AppLogAggregationStatusPage extends RmView{
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void preHead(Page.HTML<_> html) {
|
||||||
|
commonPreHead(html);
|
||||||
|
String appId = $(YarnWebParams.APPLICATION_ID);
|
||||||
|
set(
|
||||||
|
TITLE,
|
||||||
|
appId.isEmpty() ? "Bad request: missing application ID" : join(
|
||||||
|
"Application ", $(YarnWebParams.APPLICATION_ID)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Class<? extends SubView> content() {
|
||||||
|
return RMAppLogAggregationStatusBlock.class;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,148 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.yarn.util.StringHelper.join;
|
||||||
|
import static org.apache.hadoop.yarn.webapp.YarnWebParams.APPLICATION_ID;
|
||||||
|
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
|
||||||
|
import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
import org.apache.hadoop.yarn.util.Apps;
|
||||||
|
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
|
||||||
|
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
|
||||||
|
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
|
||||||
|
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
|
||||||
|
public class RMAppLogAggregationStatusBlock extends HtmlBlock {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory
|
||||||
|
.getLog(RMAppLogAggregationStatusBlock.class);
|
||||||
|
private final ResourceManager rm;
|
||||||
|
private final Configuration conf;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
RMAppLogAggregationStatusBlock(ViewContext ctx, ResourceManager rm,
|
||||||
|
Configuration conf) {
|
||||||
|
super(ctx);
|
||||||
|
this.rm = rm;
|
||||||
|
this.conf = conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void render(Block html) {
|
||||||
|
String aid = $(APPLICATION_ID);
|
||||||
|
if (aid.isEmpty()) {
|
||||||
|
puts("Bad request: requires Application ID");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ApplicationId appId;
|
||||||
|
try {
|
||||||
|
appId = Apps.toAppID(aid);
|
||||||
|
} catch (Exception e) {
|
||||||
|
puts("Invalid Application ID: " + aid);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setTitle(join("Application ", aid));
|
||||||
|
|
||||||
|
// Add LogAggregationStatus description table
|
||||||
|
// to explain the meaning of different LogAggregationStatus
|
||||||
|
DIV<Hamlet> div_description = html.div(_INFO_WRAP);
|
||||||
|
TABLE<DIV<Hamlet>> table_description =
|
||||||
|
div_description.table("#LogAggregationStatusDecription");
|
||||||
|
table_description.
|
||||||
|
tr().
|
||||||
|
th(_TH, "Log Aggregation Status").
|
||||||
|
th(_TH, "Description").
|
||||||
|
_();
|
||||||
|
table_description.tr().td(LogAggregationStatus.DISABLED.name())
|
||||||
|
.td("Log Aggregation is Disabled.")._();
|
||||||
|
table_description.tr().td(LogAggregationStatus.NOT_START.name())
|
||||||
|
.td("Log Aggregation does not Start.")._();
|
||||||
|
table_description.tr().td(LogAggregationStatus.RUNNING.name())
|
||||||
|
.td("Log Aggregation is Running.")._();
|
||||||
|
table_description.tr().td(LogAggregationStatus.FINISHED.name())
|
||||||
|
.td("Log Aggregation is Finished. All of the logs have been "
|
||||||
|
+ "aggregated successfully.")._();
|
||||||
|
table_description.tr().td(LogAggregationStatus.FAILED.name())
|
||||||
|
.td("Log Aggregation is Failed. At least one of the logs "
|
||||||
|
+ "have not been aggregated.")._();
|
||||||
|
table_description.tr().td(LogAggregationStatus.TIME_OUT.name())
|
||||||
|
.td("Does not get the Log aggregation status for a long time. "
|
||||||
|
+ "Not sure what is the current Log Aggregation Status.")._();
|
||||||
|
table_description._();
|
||||||
|
div_description._();
|
||||||
|
|
||||||
|
boolean logAggregationEnabled =
|
||||||
|
conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
|
||||||
|
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED);
|
||||||
|
// Application Log aggregation status Table
|
||||||
|
DIV<Hamlet> div = html.div(_INFO_WRAP);
|
||||||
|
TABLE<DIV<Hamlet>> table =
|
||||||
|
div.h3(
|
||||||
|
"Log Aggregation: "
|
||||||
|
+ (logAggregationEnabled ? "Enabled" : "Disabled")).table(
|
||||||
|
"#LogAggregationStatus");
|
||||||
|
table.
|
||||||
|
tr().
|
||||||
|
th(_TH, "NodeId").
|
||||||
|
th(_TH, "Log Aggregation Status").
|
||||||
|
th(_TH, "Diagnostis Message").
|
||||||
|
_();
|
||||||
|
|
||||||
|
RMApp rmApp = rm.getRMContext().getRMApps().get(appId);
|
||||||
|
if (rmApp != null) {
|
||||||
|
Map<NodeId, LogAggregationReport> logAggregationReports =
|
||||||
|
rmApp.getLogAggregationReportsForApp();
|
||||||
|
if (logAggregationReports != null && !logAggregationReports.isEmpty()) {
|
||||||
|
for (Entry<NodeId, LogAggregationReport> report :
|
||||||
|
logAggregationReports.entrySet()) {
|
||||||
|
LogAggregationStatus status =
|
||||||
|
report.getValue() == null ? null : report.getValue()
|
||||||
|
.getLogAggregationStatus();
|
||||||
|
String message =
|
||||||
|
report.getValue() == null ? null : report.getValue()
|
||||||
|
.getDiagnosticMessage();
|
||||||
|
table.tr()
|
||||||
|
.td(report.getKey().toString())
|
||||||
|
.td(status == null ? "N/A" : status.toString())
|
||||||
|
.td(message == null ? "N/A" : message)._();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
table._();
|
||||||
|
div._();
|
||||||
|
}
|
||||||
|
}
|
|
@ -68,6 +68,8 @@ public class RMWebApp extends WebApp implements YarnWebParams {
|
||||||
"appattempt");
|
"appattempt");
|
||||||
route(pajoin("/container", CONTAINER_ID), RmController.class, "container");
|
route(pajoin("/container", CONTAINER_ID), RmController.class, "container");
|
||||||
route("/errors-and-warnings", RmController.class, "errorsAndWarnings");
|
route("/errors-and-warnings", RmController.class, "errorsAndWarnings");
|
||||||
|
route(pajoin("/logaggregationstatus", APPLICATION_ID),
|
||||||
|
RmController.class, "logaggregationstatus");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -109,4 +109,8 @@ public class RmController extends Controller {
|
||||||
public void errorsAndWarnings() {
|
public void errorsAndWarnings() {
|
||||||
render(RMErrorsAndWarningsPage.class);
|
render(RMErrorsAndWarningsPage.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void logaggregationstatus() {
|
||||||
|
render(AppLogAggregationStatusPage.class);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics;
|
||||||
|
@ -190,6 +191,11 @@ public abstract class MockAsm extends MockApps {
|
||||||
public ResourceRequest getAMResourceRequest() {
|
public ResourceRequest getAMResourceRequest() {
|
||||||
return this.amReq;
|
return this.amReq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp() {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static RMApp newApplication(int i) {
|
public static RMApp newApplication(int i) {
|
||||||
|
|
|
@ -0,0 +1,318 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.logaggregationstatus;
|
||||||
|
|
||||||
|
import static org.mockito.Matchers.any;
|
||||||
|
import static org.mockito.Mockito.doAnswer;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.LogAggregationStatus;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
|
import org.mockito.stubbing.Answer;
|
||||||
|
|
||||||
|
public class TestRMAppLogAggregationStatus {
|
||||||
|
|
||||||
|
private RMContext rmContext;
|
||||||
|
private YarnScheduler scheduler;
|
||||||
|
|
||||||
|
private SchedulerEventType eventType;
|
||||||
|
|
||||||
|
private ApplicationId appId;
|
||||||
|
|
||||||
|
|
||||||
|
private final class TestSchedulerEventDispatcher implements
|
||||||
|
EventHandler<SchedulerEvent> {
|
||||||
|
@Override
|
||||||
|
public void handle(SchedulerEvent event) {
|
||||||
|
scheduler.handle(event);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
InlineDispatcher rmDispatcher = new InlineDispatcher();
|
||||||
|
|
||||||
|
rmContext =
|
||||||
|
new RMContextImpl(rmDispatcher, null, null, null,
|
||||||
|
null, null, null, null, null,
|
||||||
|
new RMApplicationHistoryWriter());
|
||||||
|
rmContext.setSystemMetricsPublisher(new SystemMetricsPublisher());
|
||||||
|
|
||||||
|
scheduler = mock(YarnScheduler.class);
|
||||||
|
doAnswer(
|
||||||
|
new Answer<Void>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Void answer(InvocationOnMock invocation) throws Throwable {
|
||||||
|
final SchedulerEvent event = (SchedulerEvent)(invocation.getArguments()[0]);
|
||||||
|
eventType = event.getType();
|
||||||
|
if (eventType == SchedulerEventType.NODE_UPDATE) {
|
||||||
|
//DO NOTHING
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).when(scheduler).handle(any(SchedulerEvent.class));
|
||||||
|
|
||||||
|
rmDispatcher.register(SchedulerEventType.class,
|
||||||
|
new TestSchedulerEventDispatcher());
|
||||||
|
|
||||||
|
appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLogAggregationStatus() throws Exception {
|
||||||
|
YarnConfiguration conf = new YarnConfiguration();
|
||||||
|
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
|
||||||
|
conf.setLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS, 1500);
|
||||||
|
RMApp rmApp = createRMApp(conf);
|
||||||
|
this.rmContext.getRMApps().put(appId, rmApp);
|
||||||
|
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.START));
|
||||||
|
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_NEW_SAVED));
|
||||||
|
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_ACCEPTED));
|
||||||
|
|
||||||
|
// This application will be running on two nodes
|
||||||
|
NodeId nodeId1 = NodeId.newInstance("localhost", 1234);
|
||||||
|
Resource capability = Resource.newInstance(4096, 4);
|
||||||
|
RMNodeImpl node1 =
|
||||||
|
new RMNodeImpl(nodeId1, rmContext, null, 0, 0, null, capability, null);
|
||||||
|
node1.handle(new RMNodeStartedEvent(nodeId1, null, null));
|
||||||
|
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId1));
|
||||||
|
|
||||||
|
NodeId nodeId2 = NodeId.newInstance("localhost", 2345);
|
||||||
|
RMNodeImpl node2 =
|
||||||
|
new RMNodeImpl(nodeId2, rmContext, null, 0, 0, null, capability, null);
|
||||||
|
node2.handle(new RMNodeStartedEvent(node2.getNodeID(), null, null));
|
||||||
|
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId2));
|
||||||
|
|
||||||
|
// The initial log aggregation status for these two nodes
|
||||||
|
// should be NOT_STARTED
|
||||||
|
Map<NodeId, LogAggregationReport> logAggregationStatus =
|
||||||
|
rmApp.getLogAggregationReportsForApp();
|
||||||
|
Assert.assertEquals(2, logAggregationStatus.size());
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
|
||||||
|
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
|
||||||
|
.entrySet()) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.NOT_START, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<ApplicationId, LogAggregationReport> node1ReportForApp =
|
||||||
|
new HashMap<ApplicationId, LogAggregationReport>();
|
||||||
|
String messageForNode1_1 =
|
||||||
|
"node1 logAggregation status updated at " + System.currentTimeMillis();
|
||||||
|
LogAggregationReport report1 =
|
||||||
|
LogAggregationReport.newInstance(appId, nodeId1,
|
||||||
|
LogAggregationStatus.RUNNING, messageForNode1_1);
|
||||||
|
node1ReportForApp.put(appId, report1);
|
||||||
|
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus
|
||||||
|
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
|
||||||
|
null, node1ReportForApp));
|
||||||
|
|
||||||
|
Map<ApplicationId, LogAggregationReport> node2ReportForApp =
|
||||||
|
new HashMap<ApplicationId, LogAggregationReport>();
|
||||||
|
String messageForNode2_1 =
|
||||||
|
"node2 logAggregation status updated at " + System.currentTimeMillis();
|
||||||
|
LogAggregationReport report2 =
|
||||||
|
LogAggregationReport.newInstance(appId, nodeId2,
|
||||||
|
LogAggregationStatus.RUNNING, messageForNode2_1);
|
||||||
|
node2ReportForApp.put(appId, report2);
|
||||||
|
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), NodeHealthStatus
|
||||||
|
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
|
||||||
|
null, node2ReportForApp));
|
||||||
|
// node1 and node2 has updated its log aggregation status
|
||||||
|
// verify that the log aggregation status for node1, node2
|
||||||
|
// has been changed
|
||||||
|
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
|
||||||
|
Assert.assertEquals(2, logAggregationStatus.size());
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
|
||||||
|
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
|
||||||
|
.entrySet()) {
|
||||||
|
if (report.getKey().equals(node1.getNodeID())) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
Assert.assertEquals(messageForNode1_1, report.getValue()
|
||||||
|
.getDiagnosticMessage());
|
||||||
|
} else if (report.getKey().equals(node2.getNodeID())) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
Assert.assertEquals(messageForNode2_1, report.getValue()
|
||||||
|
.getDiagnosticMessage());
|
||||||
|
} else {
|
||||||
|
// should not contain log aggregation report for other nodes
|
||||||
|
Assert
|
||||||
|
.fail("should not contain log aggregation report for other nodes");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// node1 updates its log aggregation status again
|
||||||
|
Map<ApplicationId, LogAggregationReport> node1ReportForApp2 =
|
||||||
|
new HashMap<ApplicationId, LogAggregationReport>();
|
||||||
|
String messageForNode1_2 =
|
||||||
|
"node1 logAggregation status updated at " + System.currentTimeMillis();
|
||||||
|
LogAggregationReport report1_2 =
|
||||||
|
LogAggregationReport.newInstance(appId, nodeId1,
|
||||||
|
LogAggregationStatus.RUNNING, messageForNode1_2);
|
||||||
|
node1ReportForApp2.put(appId, report1_2);
|
||||||
|
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus
|
||||||
|
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
|
||||||
|
null, node1ReportForApp2));
|
||||||
|
|
||||||
|
// verify that the log aggregation status for node1
|
||||||
|
// has been changed
|
||||||
|
// verify that the log aggregation status for node2
|
||||||
|
// does not change
|
||||||
|
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
|
||||||
|
Assert.assertEquals(2, logAggregationStatus.size());
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
|
||||||
|
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
|
||||||
|
.entrySet()) {
|
||||||
|
if (report.getKey().equals(node1.getNodeID())) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
Assert.assertEquals(messageForNode1_1 + messageForNode1_2, report
|
||||||
|
.getValue().getDiagnosticMessage());
|
||||||
|
} else if (report.getKey().equals(node2.getNodeID())) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
Assert.assertEquals(messageForNode2_1, report.getValue()
|
||||||
|
.getDiagnosticMessage());
|
||||||
|
} else {
|
||||||
|
// should not contain log aggregation report for other nodes
|
||||||
|
Assert
|
||||||
|
.fail("should not contain log aggregation report for other nodes");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// kill the application
|
||||||
|
rmApp.handle(new RMAppEvent(appId, RMAppEventType.KILL));
|
||||||
|
rmApp.handle(new RMAppEvent(appId, RMAppEventType.ATTEMPT_KILLED));
|
||||||
|
rmApp.handle(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED));
|
||||||
|
Assert.assertEquals(RMAppState.KILLED, rmApp.getState());
|
||||||
|
|
||||||
|
// wait for 1500 ms
|
||||||
|
Thread.sleep(1500);
|
||||||
|
|
||||||
|
// the log aggregation status for both nodes should be changed
|
||||||
|
// to TIME_OUT
|
||||||
|
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
|
||||||
|
Assert.assertEquals(2, logAggregationStatus.size());
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
|
||||||
|
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
|
||||||
|
.entrySet()) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally, node1 finished its log aggregation and sent out its final
|
||||||
|
// log aggregation status. The log aggregation status for node1 should
|
||||||
|
// be changed from TIME_OUT to Finished
|
||||||
|
Map<ApplicationId, LogAggregationReport> node1ReportForApp3 =
|
||||||
|
new HashMap<ApplicationId, LogAggregationReport>();
|
||||||
|
String messageForNode1_3 =
|
||||||
|
"node1 final logAggregation status updated at "
|
||||||
|
+ System.currentTimeMillis();
|
||||||
|
LogAggregationReport report1_3 =
|
||||||
|
LogAggregationReport.newInstance(appId, nodeId1,
|
||||||
|
LogAggregationStatus.FINISHED, messageForNode1_3);
|
||||||
|
node1ReportForApp3.put(appId, report1_3);
|
||||||
|
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus
|
||||||
|
.newInstance(true, null, 0), new ArrayList<ContainerStatus>(), null,
|
||||||
|
null, node1ReportForApp3));
|
||||||
|
|
||||||
|
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
|
||||||
|
Assert.assertEquals(2, logAggregationStatus.size());
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
|
||||||
|
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
|
||||||
|
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus
|
||||||
|
.entrySet()) {
|
||||||
|
if (report.getKey().equals(node1.getNodeID())) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.FINISHED, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
Assert.assertEquals(messageForNode1_1 + messageForNode1_2
|
||||||
|
+ messageForNode1_3, report.getValue().getDiagnosticMessage());
|
||||||
|
} else if (report.getKey().equals(node2.getNodeID())) {
|
||||||
|
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue()
|
||||||
|
.getLogAggregationStatus());
|
||||||
|
} else {
|
||||||
|
// should not contain log aggregation report for other nodes
|
||||||
|
Assert
|
||||||
|
.fail("should not contain log aggregation report for other nodes");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private RMApp createRMApp(Configuration conf) {
|
||||||
|
ApplicationSubmissionContext submissionContext =
|
||||||
|
ApplicationSubmissionContext.newInstance(appId, "test", "default",
|
||||||
|
Priority.newInstance(0), null, false, true,
|
||||||
|
2, Resource.newInstance(10, 2), "test");
|
||||||
|
return new RMAppImpl(this.appId, this.rmContext,
|
||||||
|
conf, "test", "test", "default", submissionContext,
|
||||||
|
this.rmContext.getScheduler(),
|
||||||
|
this.rmContext.getApplicationMasterService(),
|
||||||
|
System.currentTimeMillis(), "test",
|
||||||
|
null, null);
|
||||||
|
}
|
||||||
|
}
|
|
@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
|
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
|
||||||
|
@ -271,4 +272,9 @@ public class MockRMApp implements RMApp {
|
||||||
public ResourceRequest getAMResourceRequest() {
|
public ResourceRequest getAMResourceRequest() {
|
||||||
return this.amReq;
|
return this.amReq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<NodeId, LogAggregationReport> getLogAggregationReportsForApp() {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue