From 9119b3cf8f883aa2d5df534afc0c50249fed03c6 Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Thu, 14 Jun 2018 12:38:10 +0530 Subject: [PATCH] YARN-8155. Improve ATSv2 client logging in RM and NM publisher. Contributed by Abhishek Modi. --- .../timelineservice/NMTimelinePublisher.java | 42 ++++++++++++++++--- .../metrics/TimelineServiceV2Publisher.java | 8 +++- .../TimelineCollectorWebService.java | 19 ++++++--- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java index f4517262ce0..cbf3e5eb7f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java @@ -190,9 +190,20 @@ public void reportContainerResourceUsage(Container container, Long pmemUsage, LOG.error("Seems like client has been removed before the container" + " metric could be published for " + container.getContainerId()); } - } catch (IOException | YarnException e) { + } catch (IOException e) { LOG.error("Failed to publish Container metrics for container " - + container.getContainerId(), e); + + container.getContainerId()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } + } catch (YarnException e) { + LOG.error("Failed to publish Container metrics for container " + + container.getContainerId(), e.getMessage()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } } } } @@ -284,9 +295,20 @@ private void publishContainerLocalizationEvent( LOG.error("Seems like client has been removed before the event could be" + " published for " + container.getContainerId()); } - } catch (IOException | YarnException e) { + } catch (IOException e) { LOG.error("Failed to publish Container metrics for container " - + container.getContainerId(), e); + + container.getContainerId()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } + } catch (YarnException e) { + LOG.error("Failed to publish Container metrics for container " + + container.getContainerId(), e.getMessage()); + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to publish Container metrics for container " + + container.getContainerId(), e); + } } } @@ -315,8 +337,16 @@ private void putEntity(TimelineEntity entity, ApplicationId appId) { LOG.error("Seems like client has been removed before the entity " + "could be published for " + entity); } - } catch (Exception e) { - LOG.error("Error when publishing entity " + entity, e); + } catch (IOException e) { + LOG.error("Error when publishing entity " + entity); + if (LOG.isDebugEnabled()) { + LOG.debug("Error when publishing entity " + entity, e); + } + } catch (YarnException e) { + LOG.error("Error when publishing entity " + entity, e.getMessage()); + if (LOG.isDebugEnabled()) { + LOG.debug("Error when publishing entity " + entity, e); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java index ea286a057e5..89905e51cb0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TimelineServiceV2Publisher.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.metrics; +import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -457,8 +458,11 @@ private void putEntity(TimelineEntity entity, ApplicationId appId) { entities.addEntity(entity); timelineCollector.putEntities(entities, UserGroupInformation.getCurrentUser()); - } catch (Exception e) { - LOG.error("Error when publishing entity " + entity, e); + } catch (IOException e) { + LOG.error("Error when publishing entity " + entity); + if (LOG.isDebugEnabled()) { + LOG.debug("Error when publishing entity " + entity, e); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java index 61dcf9972ca..b33a0f03526 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/TimelineCollectorWebService.java @@ -61,6 +61,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.IllegalFormatException; + /** * The main per-node REST end point for timeline service writes. It is * essentially a container service that routes requests to the appropriate @@ -166,7 +169,7 @@ public Response putEntities( TimelineCollector collector = collectorManager.get(appID); if (collector == null) { LOG.error("Application: "+ appId + " is not found"); - throw new NotFoundException(); // different exception? + throw new NotFoundException("Application: "+ appId + " is not found"); } boolean isAsync = async != null && async.trim().equalsIgnoreCase("true"); @@ -179,7 +182,10 @@ public Response putEntities( } return Response.ok().build(); - } catch (Exception e) { + } catch (NotFoundException | ForbiddenException e) { + throw new WebApplicationException(e, + Response.Status.INTERNAL_SERVER_ERROR); + } catch (IOException e) { LOG.error("Error putting entities", e); throw new WebApplicationException(e, Response.Status.INTERNAL_SERVER_ERROR); @@ -221,14 +227,17 @@ public Response putDomain( TimelineCollector collector = collectorManager.get(appID); if (collector == null) { LOG.error("Application: " + appId + " is not found"); - throw new NotFoundException(); // different exception? + throw new NotFoundException("Application: " + appId + " is not found"); } domain.setOwner(callerUgi.getShortUserName()); collector.putDomain(domain, callerUgi); return Response.ok().build(); - } catch (Exception e) { + } catch (NotFoundException e) { + throw new WebApplicationException(e, + Response.Status.INTERNAL_SERVER_ERROR); + } catch (IOException e) { LOG.error("Error putting entities", e); throw new WebApplicationException(e, Response.Status.INTERNAL_SERVER_ERROR); @@ -242,7 +251,7 @@ private static ApplicationId parseApplicationId(String appId) { } else { return null; } - } catch (Exception e) { + } catch (IllegalFormatException e) { LOG.error("Invalid application ID: " + appId); return null; }