HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)
This commit is contained in:
parent
f9a17edc25
commit
6709feeb65
|
@ -180,8 +180,10 @@ public class FSHDFSUtils extends FSUtils {
|
||||||
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
|
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
|
||||||
// This should be set to how long it'll take for us to timeout against primary datanode if it
|
// This should be set to how long it'll take for us to timeout against primary datanode if it
|
||||||
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
|
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
|
||||||
// default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
|
// default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
|
||||||
long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000);
|
// timeout, then further recovery will take liner backoff with this base, to avoid endless
|
||||||
|
// preemptions when this value is not properly configured.
|
||||||
|
long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
|
||||||
|
|
||||||
Method isFileClosedMeth = null;
|
Method isFileClosedMeth = null;
|
||||||
// whether we need to look for isFileClosed method
|
// whether we need to look for isFileClosed method
|
||||||
|
@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
|
||||||
if (nbAttempt == 0) {
|
if (nbAttempt == 0) {
|
||||||
Thread.sleep(firstPause);
|
Thread.sleep(firstPause);
|
||||||
} else {
|
} else {
|
||||||
// Cycle here until subsequentPause elapses. While spinning, check isFileClosed if
|
// Cycle here until (subsequentPause * nbAttempt) elapses. While spinning, check
|
||||||
// available (should be in hadoop 2.0.5... not in hadoop 1 though.
|
// isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
|
||||||
long localStartWaiting = EnvironmentEdgeManager.currentTime();
|
long localStartWaiting = EnvironmentEdgeManager.currentTime();
|
||||||
while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
|
while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
|
||||||
subsequentPause) {
|
subsequentPauseBase * nbAttempt) {
|
||||||
Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
|
Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
|
||||||
if (findIsFileClosedMeth) {
|
if (findIsFileClosedMeth) {
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Reference in New Issue