HBASE-8646 Intermittent TestIOFencing#testFencingAroundCompaction failure due to region getting stuck in compaction

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1499049 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Enis Soztutar 2013-07-02 18:35:13 +00:00
parent 8c3a7e4a7d
commit 85ed20874e
2 changed files with 12 additions and 11 deletions

View File

@ -18,6 +18,11 @@
package org.apache.hadoop.hbase.util; package org.apache.hadoop.hbase.util;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.reflect.Method;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
@ -28,11 +33,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.reflect.Method;
/** /**
* Implementation for hdfs * Implementation for hdfs
@ -77,7 +77,7 @@ public class FSHDFSUtils extends FSUtils {
* (configurable) and then try again. * (configurable) and then try again.
* 6. If it returns true, break. * 6. If it returns true, break.
* 7. If it returns false, repeat starting at step 5. above. * 7. If it returns false, repeat starting at step 5. above.
* *
* If HDFS-4525 is available, call it every second and we might be able to exit early. * If HDFS-4525 is available, call it every second and we might be able to exit early.
*/ */
boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p, boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p,
@ -89,8 +89,8 @@ public class FSHDFSUtils extends FSUtils {
// usually needs 10 minutes before marking the nodes as dead. So we're putting ourselves // usually needs 10 minutes before marking the nodes as dead. So we're putting ourselves
// beyond that limit 'to be safe'. // beyond that limit 'to be safe'.
long recoveryTimeout = conf.getInt("hbase.lease.recovery.timeout", 900000) + startWaiting; long recoveryTimeout = conf.getInt("hbase.lease.recovery.timeout", 900000) + startWaiting;
// This setting should be what the cluster dfs heartbeat is set to. // This setting should be a little bit above what the cluster dfs heartbeat is set to.
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 3000); long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
// This should be set to how long it'll take for us to timeout against primary datanode if it // This should be set to how long it'll take for us to timeout against primary datanode if it
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the // is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
// default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.

View File

@ -162,6 +162,7 @@ public class TestIOFencing {
HTableDescriptor htd, RegionServerServices rsServices) { HTableDescriptor htd, RegionServerServices rsServices) {
super(tableDir, log, fs, confParam, info, htd, rsServices); super(tableDir, log, fs, confParam, info, htd, rsServices);
} }
@Override
protected HStore instantiateHStore(final HColumnDescriptor family) throws IOException { protected HStore instantiateHStore(final HColumnDescriptor family) throws IOException {
return new BlockCompactionsInCompletionHStore(this, family, this.conf); return new BlockCompactionsInCompletionHStore(this, family, this.conf);
} }
@ -263,10 +264,10 @@ public class TestIOFencing {
long startWaitTime = System.currentTimeMillis(); long startWaitTime = System.currentTimeMillis();
while (newRegion == null) { while (newRegion == null) {
LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME)); LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME));
Thread.sleep(100); Thread.sleep(1000);
newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME); newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME);
assertTrue("Timed out waiting for new server to open region", assertTrue("Timed out waiting for new server to open region",
System.currentTimeMillis() - startWaitTime < 60000); System.currentTimeMillis() - startWaitTime < 300000);
} }
LOG.info("Allowing compaction to proceed"); LOG.info("Allowing compaction to proceed");
compactingRegion.allowCompactions(); compactingRegion.allowCompactions();
@ -289,7 +290,7 @@ public class TestIOFencing {
startWaitTime = System.currentTimeMillis(); startWaitTime = System.currentTimeMillis();
while (newRegion.compactCount == 0) { while (newRegion.compactCount == 0) {
Thread.sleep(1000); Thread.sleep(1000);
assertTrue("New region never compacted", System.currentTimeMillis() - startWaitTime < 30000); assertTrue("New region never compacted", System.currentTimeMillis() - startWaitTime < 180000);
} }
assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, TEST_UTIL.countRows(table)); assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, TEST_UTIL.countRows(table));
} finally { } finally {