From 3946c10b132fcbce1012f935a20bb9d59be2f823 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Thu, 31 Jan 2013 22:51:49 +0000 Subject: [PATCH] svn merge -c 1441239 FIXES: YARN-364. AggregatedLogDeletionService can take too long to delete logs. Contributed by Jason Lowe git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1441244 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/site/apt/ClusterSetup.apt.vm | 7 ++ hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop/yarn/conf/YarnConfiguration.java | 9 +++ .../AggregatedLogDeletionService.java | 9 ++- .../src/main/resources/yarn-default.xml | 9 +++ .../TestAggregatedLogDeletionService.java | 71 +++++++++++++++++++ 6 files changed, 107 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm index 5f28d7c3072..da69b303b37 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm @@ -325,6 +325,13 @@ Hadoop MapReduce Next Generation - Cluster Setup | | | How long to keep aggregation logs before deleting them. -1 disables. | | | | Be careful, set this too small and you will spam the name node. | *-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | <-1> | | +| | | Time between checks for aggregated log retention. If set to 0 or a | +| | | negative value then the value is computed as one-tenth of the | +| | | aggregated log retention time. | +| | | Be careful, set this too small and you will spam the name node. | +*-------------------------+-------------------------+------------------------+ diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 30fb46ae831..7d4706177c9 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -263,6 +263,9 @@ Release 0.23.7 - UNRELEASED YARN-343. Capacity Scheduler maximum-capacity value -1 is invalid (Xuan Gong via tgraves) + YARN-364. AggregatedLogDeletionService can take too long to delete logs + (jlowe) + Release 0.23.6 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6b63ad99f01..d0cd3132995 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -379,6 +379,15 @@ public class YarnConfiguration extends Configuration { + "log-aggregation.retain-seconds"; public static final long DEFAULT_LOG_AGGREGATION_RETAIN_SECONDS = -1; + /** + * How long to wait between aggregated log retention checks. If set to + * a value <= 0 then the value is computed as one-tenth of the log retention + * setting. Be careful set this too small and you will spam the name node. + */ + public static final String LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS = + YARN_PREFIX + "log-aggregation.retain-check-interval-seconds"; + public static final long DEFAULT_LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS = -1; + /** * Number of seconds to retain logs on the NodeManager. Only applicable if Log * aggregation is disabled diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogDeletionService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogDeletionService.java index 9fbcae9989d..c8603ab7c10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogDeletionService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogDeletionService.java @@ -140,9 +140,16 @@ public class AggregatedLogDeletionService extends AbstractService { " too small (" + retentionSecs + ")"); return; } + long checkIntervalMsecs = 1000 * conf.getLong( + YarnConfiguration.LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS, + YarnConfiguration.DEFAULT_LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS); + if (checkIntervalMsecs <= 0) { + // when unspecified compute check interval as 1/10th of retention + checkIntervalMsecs = (retentionSecs * 1000) / 10; + } TimerTask task = new LogDeletionTask(conf, retentionSecs); timer = new Timer(); - timer.scheduleAtFixedRate(task, 0, retentionSecs * 1000); + timer.scheduleAtFixedRate(task, 0, checkIntervalMsecs); super.start(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index e5f05671d2d..e8c28a41b3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -410,6 +410,15 @@ -1 + + How long to wait between aggregated log retention checks. + If set to 0 or a negative value then the value is computed as one-tenth + of the aggregated log retention time. Be careful set this too small and + you will spam the name node. + yarn.log-aggregation.retain-check-interval-seconds + -1 + + Time in seconds to retain user logs. Only applicable if log aggregation is disabled diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogDeletionService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogDeletionService.java index c1cf9af3602..035cd9515c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogDeletionService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogDeletionService.java @@ -28,12 +28,19 @@ import org.apache.hadoop.fs.FilterFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.Before; import org.junit.Test; import static org.mockito.Mockito.*; public class TestAggregatedLogDeletionService { + @Before + public void closeFilesystems() throws IOException { + // prevent the same mockfs instance from being reused due to FS cache + FileSystem.closeAll(); + } + @Test public void testDeletion() throws Exception { long now = System.currentTimeMillis(); @@ -121,6 +128,70 @@ public class TestAggregatedLogDeletionService { verify(mockFs).delete(app4Dir, true); } + @Test + public void testCheckInterval() throws Exception { + long RETENTION_SECS = 10 * 24 * 3600; + long now = System.currentTimeMillis(); + long toDeleteTime = now - RETENTION_SECS*1000; + + String root = "mockfs://foo/"; + String remoteRootLogDir = root+"tmp/logs"; + String suffix = "logs"; + Configuration conf = new Configuration(); + conf.setClass("fs.mockfs.impl", MockFileSystem.class, FileSystem.class); + conf.set(YarnConfiguration.LOG_AGGREGATION_ENABLED, "true"); + conf.set(YarnConfiguration.LOG_AGGREGATION_RETAIN_SECONDS, "864000"); + conf.set(YarnConfiguration.LOG_AGGREGATION_RETAIN_CHECK_INTERVAL_SECONDS, "1"); + conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteRootLogDir); + conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR_SUFFIX, suffix); + + // prevent us from picking up the same mockfs instance from another test + FileSystem.closeAll(); + Path rootPath = new Path(root); + FileSystem rootFs = rootPath.getFileSystem(conf); + FileSystem mockFs = ((FilterFileSystem)rootFs).getRawFileSystem(); + + Path remoteRootLogPath = new Path(remoteRootLogDir); + + Path userDir = new Path(remoteRootLogPath, "me"); + FileStatus userDirStatus = new FileStatus(0, true, 0, 0, now, userDir); + + when(mockFs.listStatus(remoteRootLogPath)).thenReturn( + new FileStatus[]{userDirStatus}); + + Path userLogDir = new Path(userDir, suffix); + Path app1Dir = new Path(userLogDir, "application_1_1"); + FileStatus app1DirStatus = new FileStatus(0, true, 0, 0, now, app1Dir); + + when(mockFs.listStatus(userLogDir)).thenReturn( + new FileStatus[]{app1DirStatus}); + + Path app1Log1 = new Path(app1Dir, "host1"); + FileStatus app1Log1Status = new FileStatus(10, false, 1, 1, now, app1Log1); + + when(mockFs.listStatus(app1Dir)).thenReturn( + new FileStatus[]{app1Log1Status}); + + AggregatedLogDeletionService deletionSvc = + new AggregatedLogDeletionService(); + deletionSvc.init(conf); + deletionSvc.start(); + + verify(mockFs, timeout(10000).atLeast(4)).listStatus(any(Path.class)); + verify(mockFs, never()).delete(app1Dir, true); + + // modify the timestamp of the logs and verify it's picked up quickly + app1DirStatus = new FileStatus(0, true, 0, 0, toDeleteTime, app1Dir); + app1Log1Status = new FileStatus(10, false, 1, 1, toDeleteTime, app1Log1); + when(mockFs.listStatus(userLogDir)).thenReturn( + new FileStatus[]{app1DirStatus}); + when(mockFs.listStatus(app1Dir)).thenReturn( + new FileStatus[]{app1Log1Status}); + + verify(mockFs, timeout(10000)).delete(app1Dir, true); + + deletionSvc.stop(); + } static class MockFileSystem extends FilterFileSystem { MockFileSystem() {