MAPREDUCE-6680. JHS UserLogDir scan algorithm sometime could skip directory with update in CloudFS (Azure FileSystem, S3, etc. Contributed by Junping Du

(cherry picked from commit 1e48eefe58)
This commit is contained in:
Jian He 2016-04-20 19:02:10 -07:00
parent 7fb1cfc591
commit 0b008dc21c
1 changed files with 16 additions and 3 deletions

View File

@ -306,10 +306,21 @@ public class HistoryFileManager extends AbstractService {
*/
private class UserLogDir {
long modTime = 0;
private long scanTime = 0;
public synchronized void scanIfNeeded(FileStatus fs) {
long newModTime = fs.getModificationTime();
if (modTime != newModTime) {
// MAPREDUCE-6680: In some Cloud FileSystem, like Azure FS or S3, file's
// modification time is truncated into seconds. In that case,
// modTime == newModTime doesn't means no file update in the directory,
// so we need to have additional check.
// Note: modTime (X second Y millisecond) could be casted to X second or
// X+1 second.
if (modTime != newModTime
|| (scanTime/1000) == (modTime/1000)
|| (scanTime/1000 + 1) == (modTime/1000)) {
// reset scanTime before scanning happens
scanTime = System.currentTimeMillis();
Path p = fs.getPath();
try {
scanIntermediateDirectory(p);
@ -323,6 +334,8 @@ public class HistoryFileManager extends AbstractService {
if (LOG.isDebugEnabled()) {
LOG.debug("Scan not needed of " + fs.getPath());
}
// reset scanTime
scanTime = System.currentTimeMillis();
}
}
}