MAPREDUCE-7101. Add config parameter to allow JHS to alway scan user dir irrespective of modTime. (Thomas Marquardt via asuresh)

(cherry picked from commit 5670e89b2e)
This commit is contained in:
Arun Suresh 2018-06-12 15:36:52 -07:00
parent b3f4c27661
commit 358fe4c61a
3 changed files with 23 additions and 3 deletions

View File

@ -62,6 +62,11 @@ public class JHAdminConfig {
public static final long DEFAULT_MR_HISTORY_CLEANER_INTERVAL_MS = public static final long DEFAULT_MR_HISTORY_CLEANER_INTERVAL_MS =
1 * 24 * 60 * 60 * 1000l; //1 day 1 * 24 * 60 * 60 * 1000l; //1 day
/** Always scan user dir, irrespective of dir modification time.*/
public static final String MR_HISTORY_ALWAYS_SCAN_USER_DIR =
MR_HISTORY_PREFIX + "always-scan-user-dir";
public static final boolean DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR =
false;
/** The number of threads to handle client API requests.*/ /** The number of threads to handle client API requests.*/
public static final String MR_HISTORY_CLIENT_THREAD_COUNT = public static final String MR_HISTORY_CLIENT_THREAD_COUNT =

View File

@ -1718,6 +1718,15 @@
<description></description> <description></description>
</property> </property>
<property>
<name>mapreduce.jobhistory.always-scan-user-dir</name>
<value>false</value>
<description>Some Cloud FileSystems do not currently update the
modification time of directories. To support these filesystems, this
configuration value should be set to 'true'.
</description>
</property>
<property> <property>
<name>mapreduce.jobhistory.done-dir</name> <name>mapreduce.jobhistory.done-dir</name>
<value>${yarn.app.mapreduce.am.staging-dir}/history/done</value> <value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>

View File

@ -322,7 +322,13 @@ public class HistoryFileManager extends AbstractService {
// so we need to have additional check. // so we need to have additional check.
// Note: modTime (X second Y millisecond) could be casted to X second or // Note: modTime (X second Y millisecond) could be casted to X second or
// X+1 second. // X+1 second.
if (modTime != newModTime // MAPREDUCE-7101: Some Cloud FileSystems do not currently update the
// modification time of directories. For these, we scan every time if
// the 'alwaysScan' is true.
boolean alwaysScan = conf.getBoolean(
JHAdminConfig.MR_HISTORY_ALWAYS_SCAN_USER_DIR,
JHAdminConfig.DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR);
if (alwaysScan || modTime != newModTime
|| (scanTime/1000) == (modTime/1000) || (scanTime/1000) == (modTime/1000)
|| (scanTime/1000 + 1) == (modTime/1000)) { || (scanTime/1000 + 1) == (modTime/1000)) {
// reset scanTime before scanning happens // reset scanTime before scanning happens