diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java index 80076808a15..659e63b44bb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java @@ -18,6 +18,11 @@ package org.apache.hadoop.hbase.util; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.reflect.Method; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -28,11 +33,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.lang.reflect.Method; - /** * Implementation for hdfs @@ -77,7 +77,7 @@ public class FSHDFSUtils extends FSUtils { * (configurable) and then try again. * 6. If it returns true, break. * 7. If it returns false, repeat starting at step 5. above. - * + * * If HDFS-4525 is available, call it every second and we might be able to exit early. */ boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p, @@ -89,8 +89,8 @@ public class FSHDFSUtils extends FSUtils { // usually needs 10 minutes before marking the nodes as dead. So we're putting ourselves // beyond that limit 'to be safe'. long recoveryTimeout = conf.getInt("hbase.lease.recovery.timeout", 900000) + startWaiting; - // This setting should be what the cluster dfs heartbeat is set to. - long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 3000); + // This setting should be a little bit above what the cluster dfs heartbeat is set to. + long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000); // This should be set to how long it'll take for us to timeout against primary datanode if it // is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java index b47ab58427e..3c189f2e337 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java @@ -162,6 +162,7 @@ public class TestIOFencing { HTableDescriptor htd, RegionServerServices rsServices) { super(tableDir, log, fs, confParam, info, htd, rsServices); } + @Override protected HStore instantiateHStore(final HColumnDescriptor family) throws IOException { return new BlockCompactionsInCompletionHStore(this, family, this.conf); } @@ -263,10 +264,10 @@ public class TestIOFencing { long startWaitTime = System.currentTimeMillis(); while (newRegion == null) { LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME)); - Thread.sleep(100); + Thread.sleep(1000); newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME); assertTrue("Timed out waiting for new server to open region", - System.currentTimeMillis() - startWaitTime < 60000); + System.currentTimeMillis() - startWaitTime < 300000); } LOG.info("Allowing compaction to proceed"); compactingRegion.allowCompactions(); @@ -289,7 +290,7 @@ public class TestIOFencing { startWaitTime = System.currentTimeMillis(); while (newRegion.compactCount == 0) { Thread.sleep(1000); - assertTrue("New region never compacted", System.currentTimeMillis() - startWaitTime < 30000); + assertTrue("New region never compacted", System.currentTimeMillis() - startWaitTime < 180000); } assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, TEST_UTIL.countRows(table)); } finally {