From 21e416ad27a6e23ac77ead8f79440df841387af3 Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Wed, 1 Aug 2018 22:25:53 +0530 Subject: [PATCH] YARN-8155. Improve ATSv2 client logging in RM and NM publisher. Contributed by Abhishek Modi. --- .../timelineservice/NMTimelinePublisher.java | 43 ++++++++++++++++--- .../metrics/TimelineServiceV2Publisher.java | 8 +++- .../TimelineCollectorWebService.java | 13 ++++-- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java index bba56703b6a..08e36514be7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java @@ -189,9 +189,20 @@ public void reportContainerResourceUsage(Container container, Long pmemUsage, LOG.error("Seems like client has been removed before the container" + " metric could be published for " + container.getContainerId()); } - } catch (IOException | YarnException e) { + } catch (IOException e) { LOG.error("Failed to publish Container metrics for container " - + container.getContainerId(), e); + + container.getContainerId()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } + } catch (YarnException e) { + LOG.error("Failed to publish Container metrics for container " + + container.getContainerId() + " Error: " + e.getMessage()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } } } } @@ -283,9 +294,20 @@ private void publishContainerLocalizationEvent( LOG.error("Seems like client has been removed before the event could be" + " published for " + container.getContainerId()); } - } catch (IOException | YarnException e) { + } catch (IOException e) { LOG.error("Failed to publish Container metrics for container " - + container.getContainerId(), e); + + container.getContainerId()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } + } catch (YarnException e) { + LOG.error("Failed to publish Container metrics for container " + + container.getContainerId() + " Error: " + e.getMessage()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } } } @@ -314,8 +336,17 @@ private void putEntity(TimelineEntity entity, ApplicationId appId) { LOG.error("Seems like client has been removed before the entity " + "could be published for " + entity); } - } catch (Exception e) { - LOG.error("Error when publishing entity " + entity, e); + } catch (IOException e) { + LOG.error("Error when publishing entity " + entity); + if (LOG.isDebugEnabled()) { + LOG.debug("Error when publishing entity " + entity, e); + } + } catch (YarnException e) { + LOG.error("Error when publishing entity " + entity + " Error: " + + e.getMessage()); + if (LOG.isDebugEnabled()) { + LOG.debug("Error when publishing entity " + entity, e); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java index 8acabda3c68..5cba43bf852 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.metrics; +import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -453,8 +454,11 @@ private void putEntity(TimelineEntity entity, ApplicationId appId) { entities.addEntity(entity); timelineCollector.putEntities(entities, UserGroupInformation.getCurrentUser()); - } catch (Exception e) { - LOG.error("Error when publishing entity " + entity, e); + } catch (IOException e) { + LOG.error("Error when publishing entity " + entity); + if (LOG.isDebugEnabled()) { + LOG.debug("Error when publishing entity " + entity, e); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java index e50e4facdef..e0a1016532c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java @@ -59,6 +59,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.IllegalFormatException; + /** * The main per-node REST end point for timeline service writes. It is * essentially a container service that routes requests to the appropriate @@ -163,7 +166,7 @@ public Response putEntities( TimelineCollector collector = collectorManager.get(appID); if (collector == null) { LOG.error("Application: "+ appId + " is not found"); - throw new NotFoundException(); // different exception? + throw new NotFoundException("Application: "+ appId + " is not found"); } boolean isAsync = async != null && async.trim().equalsIgnoreCase("true"); @@ -176,13 +179,17 @@ public Response putEntities( } return Response.ok().build(); - } catch (Exception e) { + } catch (NotFoundException | ForbiddenException e) { + throw new WebApplicationException(e, + Response.Status.INTERNAL_SERVER_ERROR); + } catch (IOException e) { LOG.error("Error putting entities", e); throw new WebApplicationException(e, Response.Status.INTERNAL_SERVER_ERROR); } } + private static ApplicationId parseApplicationId(String appId) { try { if (appId != null) { @@ -190,7 +197,7 @@ private static ApplicationId parseApplicationId(String appId) { } else { return null; } - } catch (Exception e) { + } catch (IllegalFormatException e) { LOG.error("Invalid application ID: " + appId); return null; }