HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)

This commit is contained in:
Liu Shaohui 2015-03-19 10:07:25 +08:00
parent f9a17edc25
commit 6709feeb65
1 changed files with 8 additions and 6 deletions

View File

@ -180,8 +180,10 @@ public class FSHDFSUtils extends FSUtils {
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000); long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
// This should be set to how long it'll take for us to timeout against primary datanode if it // This should be set to how long it'll take for us to timeout against primary datanode if it
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the // is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
// default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000); // timeout, then further recovery will take liner backoff with this base, to avoid endless
// preemptions when this value is not properly configured.
long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
Method isFileClosedMeth = null; Method isFileClosedMeth = null;
// whether we need to look for isFileClosed method // whether we need to look for isFileClosed method
@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
if (nbAttempt == 0) { if (nbAttempt == 0) {
Thread.sleep(firstPause); Thread.sleep(firstPause);
} else { } else {
// Cycle here until subsequentPause elapses. While spinning, check isFileClosed if // Cycle here until (subsequentPause * nbAttempt) elapses. While spinning, check
// available (should be in hadoop 2.0.5... not in hadoop 1 though. // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
long localStartWaiting = EnvironmentEdgeManager.currentTime(); long localStartWaiting = EnvironmentEdgeManager.currentTime();
while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) < while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
subsequentPause) { subsequentPauseBase * nbAttempt) {
Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000)); Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
if (findIsFileClosedMeth) { if (findIsFileClosedMeth) {
try { try {