MAPREDUCE-6680. JHS UserLogDir scan algorithm sometime could skip directory with update in CloudFS (Azure FileSystem, S3, etc. Contributed by Junping Du
(cherry picked from commit 1e48eefe58
)
This commit is contained in:
parent
7fb1cfc591
commit
0b008dc21c
|
@ -306,10 +306,21 @@ public class HistoryFileManager extends AbstractService {
|
|||
*/
|
||||
private class UserLogDir {
|
||||
long modTime = 0;
|
||||
private long scanTime = 0;
|
||||
|
||||
public synchronized void scanIfNeeded(FileStatus fs) {
|
||||
long newModTime = fs.getModificationTime();
|
||||
if (modTime != newModTime) {
|
||||
// MAPREDUCE-6680: In some Cloud FileSystem, like Azure FS or S3, file's
|
||||
// modification time is truncated into seconds. In that case,
|
||||
// modTime == newModTime doesn't means no file update in the directory,
|
||||
// so we need to have additional check.
|
||||
// Note: modTime (X second Y millisecond) could be casted to X second or
|
||||
// X+1 second.
|
||||
if (modTime != newModTime
|
||||
|| (scanTime/1000) == (modTime/1000)
|
||||
|| (scanTime/1000 + 1) == (modTime/1000)) {
|
||||
// reset scanTime before scanning happens
|
||||
scanTime = System.currentTimeMillis();
|
||||
Path p = fs.getPath();
|
||||
try {
|
||||
scanIntermediateDirectory(p);
|
||||
|
@ -323,6 +334,8 @@ public class HistoryFileManager extends AbstractService {
|
|||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Scan not needed of " + fs.getPath());
|
||||
}
|
||||
// reset scanTime
|
||||
scanTime = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue