MAPREDUCE-6680. JHS UserLogDir scan algorithm sometime could skip directory with update in CloudFS (Azure FileSystem, S3, etc. Contributed by Junping Du

(cherry picked from commit 1e48eefe58)
This commit is contained in:
Jian He 2016-04-20 19:02:10 -07:00
parent 83a5cdc400
commit 70a31458e0
1 changed files with 16 additions and 3 deletions

View File

@ -307,10 +307,21 @@ public class HistoryFileManager extends AbstractService {
*/
private class UserLogDir {
long modTime = 0;
private long scanTime = 0;
public synchronized void scanIfNeeded(FileStatus fs) {
long newModTime = fs.getModificationTime();
if (modTime != newModTime) {
// MAPREDUCE-6680: In some Cloud FileSystem, like Azure FS or S3, file's
// modification time is truncated into seconds. In that case,
// modTime == newModTime doesn't means no file update in the directory,
// so we need to have additional check.
// Note: modTime (X second Y millisecond) could be casted to X second or
// X+1 second.
if (modTime != newModTime
|| (scanTime/1000) == (modTime/1000)
|| (scanTime/1000 + 1) == (modTime/1000)) {
// reset scanTime before scanning happens
scanTime = System.currentTimeMillis();
Path p = fs.getPath();
try {
scanIntermediateDirectory(p);
@ -324,10 +335,12 @@ public class HistoryFileManager extends AbstractService {
if (LOG.isDebugEnabled()) {
LOG.debug("Scan not needed of " + fs.getPath());
}
// reset scanTime
scanTime = System.currentTimeMillis();
}
}
}
public class HistoryFileInfo {
private Path historyFile;
private Path confFile;