From 588c22e5a08f65094543092ad56a6a47be2f6131 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Fri, 5 Jun 2015 22:38:31 +0000 Subject: [PATCH] MAPREDUCE-6354. ShuffleHandler should be able to log shuffle connections. Contributed by Chang Li (cherry picked from commit b3ffa870034d06608a1946e2d9ce7dbd535a2c53) --- .../src/main/conf/log4j.properties | 21 ++++++++++++------- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../apache/hadoop/mapred/ShuffleHandler.java | 15 +++++++++++-- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties index 3a0a3adb82f..dcffead6c43 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties @@ -67,7 +67,7 @@ log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n # # console -# Add "console" to rootlogger above if you want to use this +# Add "console" to rootlogger above if you want to use this # log4j.appender.console=org.apache.log4j.ConsoleAppender @@ -110,7 +110,7 @@ hadoop.security.log.maxfilesize=256MB hadoop.security.log.maxbackupindex=20 log4j.category.SecurityLogger=${hadoop.security.logger} hadoop.security.log.file=SecurityAuth-${user.name}.audit -log4j.appender.RFAS=org.apache.log4j.RollingFileAppender +log4j.appender.RFAS=org.apache.log4j.RollingFileAppender log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n @@ -120,7 +120,7 @@ log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} # # Daily Rolling Security appender # -log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n @@ -184,9 +184,9 @@ log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter # -# Job Summary Appender +# Job Summary Appender # -# Use following logger to send summary to separate file defined by +# Use following logger to send summary to separate file defined by # hadoop.mapreduce.jobsummary.log.file : # hadoop.mapreduce.jobsummary.logger=INFO,JSA # @@ -204,7 +204,12 @@ log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduc log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false # -# Yarn ResourceManager Application Summary Log +# shuffle connection log from shuffleHandler +# Uncomment the following line to enable logging of shuffle connections +# log4j.logger.org.apache.hadoop.mapred.ShuffleHandler.audit=DEBUG + +# +# Yarn ResourceManager Application Summary Log # # Set the ResourceManager summary log filename yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log @@ -212,8 +217,8 @@ yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger} #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY -# To enable AppSummaryLogging for the RM, -# set yarn.server.resourcemanager.appsummary.logger to +# To enable AppSummaryLogging for the RM, +# set yarn.server.resourcemanager.appsummary.logger to # ,RMSUMMARY in hadoop-env.sh # Appender for ResourceManager Application Summary Log diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 565ace4f149..1ed0bf54dcc 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -79,6 +79,9 @@ Release 2.8.0 - UNRELEASED MAPREDUCE-6383. Pi job (QuasiMonteCarlo) should not try to read the results file if its job fails. (Harsh J via devaraj) + MAPREDUCE-6354. ShuffleHandler should be able to log shuffle connections + (Chang Li via jlowe) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java index 6e069f19263..eedf42b3a7d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java @@ -136,7 +136,8 @@ import com.google.protobuf.ByteString; public class ShuffleHandler extends AuxiliaryService { private static final Log LOG = LogFactory.getLog(ShuffleHandler.class); - + private static final Log AUDITLOG = + LogFactory.getLog(ShuffleHandler.class.getName()+".audit"); public static final String SHUFFLE_MANAGE_OS_CACHE = "mapreduce.shuffle.manage.os.cache"; public static final boolean DEFAULT_SHUFFLE_MANAGE_OS_CACHE = true; @@ -751,6 +752,14 @@ public class ShuffleHandler extends AuxiliaryService { sendError(ctx, "Too many job/reduce parameters", BAD_REQUEST); return; } + + // this audit log is disabled by default, + // to turn it on please enable this audit log + // on log4j.properties by uncommenting the setting + if (AUDITLOG.isDebugEnabled()) { + AUDITLOG.debug("shuffle for " + jobQ.get(0) + + " reducer " + reduceQ.get(0)); + } int reduceId; String jobId; try { @@ -897,7 +906,9 @@ public class ShuffleHandler extends AuxiliaryService { protected void setResponseHeaders(HttpResponse response, boolean keepAliveParam, long contentLength) { if (!connectionKeepAliveEnabled && !keepAliveParam) { - LOG.info("Setting connection close header..."); + if (LOG.isDebugEnabled()) { + LOG.debug("Setting connection close header..."); + } response.setHeader(HttpHeaders.CONNECTION, CONNECTION_CLOSE); } else { response.setHeader(HttpHeaders.CONTENT_LENGTH,