MAPREDUCE-7101. Add config parameter to allow JHS to alway scan user dir irrespective of modTime. (Thomas Marquardt via wangda)

Change-Id: I8d8e71c44469fb48f1f5108620843e35e0100411
This commit is contained in:
Wangda Tan 2018-06-15 13:18:17 -07:00
parent 271ea7a3d9
commit 332139e928
3 changed files with 23 additions and 3 deletions

View File

@ -61,8 +61,13 @@ public class JHAdminConfig {
MR_HISTORY_PREFIX + "cleaner.interval-ms";
public static final long DEFAULT_MR_HISTORY_CLEANER_INTERVAL_MS =
1 * 24 * 60 * 60 * 1000l; //1 day
/** Always scan user dir, irrespective of dir modification time.*/
public static final String MR_HISTORY_ALWAYS_SCAN_USER_DIR =
MR_HISTORY_PREFIX + "always-scan-user-dir";
public static final boolean DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR =
false;
/** The number of threads to handle client API requests.*/
public static final String MR_HISTORY_CLIENT_THREAD_COUNT =
MR_HISTORY_PREFIX + "client.thread-count";

View File

@ -1729,6 +1729,15 @@
</description>
</property>
<property>
<name>mapreduce.jobhistory.always-scan-user-dir</name>
<value>false</value>
<description>Some Cloud FileSystems do not currently update the
modification time of directories. To support these filesystems, this
configuration value should be set to 'true'.
</description>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>

View File

@ -324,7 +324,13 @@ public class HistoryFileManager extends AbstractService {
// so we need to have additional check.
// Note: modTime (X second Y millisecond) could be casted to X second or
// X+1 second.
if (modTime != newModTime
// MAPREDUCE-7101: Some Cloud FileSystems do not currently update the
// modification time of directories. For these, we scan every time if
// the 'alwaysScan' is true.
boolean alwaysScan = conf.getBoolean(
JHAdminConfig.MR_HISTORY_ALWAYS_SCAN_USER_DIR,
JHAdminConfig.DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR);
if (alwaysScan || modTime != newModTime
|| (scanTime/1000) == (modTime/1000)
|| (scanTime/1000 + 1) == (modTime/1000)) {
// reset scanTime before scanning happens