YARN-4596. SystemMetricPublisher should not swallow error messages from TimelineClient#putEntities. Contributed by Li Lu

This commit is contained in:
Jian He 2016-01-18 16:58:39 -08:00
parent 62d6166211
commit 6db022fc48
3 changed files with 38 additions and 4 deletions

View File

@ -1222,6 +1222,9 @@ Release 2.8.0 - UNRELEASED
YARN-4502. Fix two AM containers get allocated when AM restart. YARN-4502. Fix two AM containers get allocated when AM restart.
(Vinod Kumar Vavilapalli via wangda) (Vinod Kumar Vavilapalli via wangda)
YARN-4596. SystemMetricPublisher should not swallow error messages from
TimelineClient#putEntities. (Li Lu via jianhe)
Release 2.7.3 - UNRELEASED Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1140,7 +1140,8 @@ public class ApplicationMaster {
ugi.doAs(new PrivilegedExceptionAction<TimelinePutResponse>() { ugi.doAs(new PrivilegedExceptionAction<TimelinePutResponse>() {
@Override @Override
public TimelinePutResponse run() throws Exception { public TimelinePutResponse run() throws Exception {
return timelineClient.putEntities(entity); return processTimelineResponseErrors(
timelineClient.putEntities(entity));
} }
}); });
} catch (Exception e) { } catch (Exception e) {
@ -1165,7 +1166,8 @@ public class ApplicationMaster {
event.addEventInfo("Exit Status", container.getExitStatus()); event.addEventInfo("Exit Status", container.getExitStatus());
entity.addEvent(event); entity.addEvent(event);
try { try {
timelineClient.putEntities(entity); TimelinePutResponse response = timelineClient.putEntities(entity);
processTimelineResponseErrors(response);
} catch (YarnException | IOException e) { } catch (YarnException | IOException e) {
LOG.error("Container end event could not be published for " LOG.error("Container end event could not be published for "
+ container.getContainerId().toString(), e); + container.getContainerId().toString(), e);
@ -1185,7 +1187,8 @@ public class ApplicationMaster {
event.setTimestamp(System.currentTimeMillis()); event.setTimestamp(System.currentTimeMillis());
entity.addEvent(event); entity.addEvent(event);
try { try {
timelineClient.putEntities(entity); TimelinePutResponse response = timelineClient.putEntities(entity);
processTimelineResponseErrors(response);
} catch (YarnException | IOException e) { } catch (YarnException | IOException e) {
LOG.error("App Attempt " LOG.error("App Attempt "
+ (appEvent.equals(DSEvent.DS_APP_ATTEMPT_START) ? "start" : "end") + (appEvent.equals(DSEvent.DS_APP_ATTEMPT_START) ? "start" : "end")
@ -1194,6 +1197,22 @@ public class ApplicationMaster {
} }
} }
private static TimelinePutResponse processTimelineResponseErrors(
TimelinePutResponse response) {
List<TimelinePutResponse.TimelinePutError> errors = response.getErrors();
if (errors.size() == 0) {
LOG.debug("Timeline entities are successfully put");
} else {
for (TimelinePutResponse.TimelinePutError error : errors) {
LOG.error(
"Error when publishing entity [" + error.getEntityType() + ","
+ error.getEntityId() + "], server side error code: "
+ error.getErrorCode());
}
}
return response;
}
RMCallbackHandler getRMCallbackHandler() { RMCallbackHandler getRMCallbackHandler() {
return new RMCallbackHandler(); return new RMCallbackHandler();
} }

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent; import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
import org.apache.hadoop.yarn.client.api.TimelineClient; import org.apache.hadoop.yarn.client.api.TimelineClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.AsyncDispatcher;
@ -500,7 +501,18 @@ public class SystemMetricsPublisher extends CompositeService {
LOG.debug("Publishing the entity " + entity.getEntityId() + LOG.debug("Publishing the entity " + entity.getEntityId() +
", JSON-style content: " + TimelineUtils.dumpTimelineRecordtoJSON(entity)); ", JSON-style content: " + TimelineUtils.dumpTimelineRecordtoJSON(entity));
} }
client.putEntities(entity); TimelinePutResponse response = client.putEntities(entity);
List<TimelinePutResponse.TimelinePutError> errors = response.getErrors();
if (errors.size() == 0) {
LOG.debug("Timeline entities are successfully put");
} else {
for (TimelinePutResponse.TimelinePutError error : errors) {
LOG.error(
"Error when publishing entity [" + error.getEntityType() + ","
+ error.getEntityId() + "], server side error code: "
+ error.getErrorCode());
}
}
} catch (Exception e) { } catch (Exception e) {
LOG.error("Error when publishing entity [" + entity.getEntityType() + "," LOG.error("Error when publishing entity [" + entity.getEntityType() + ","
+ entity.getEntityId() + "]", e); + entity.getEntityId() + "]", e);