HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails sporadically. Contributed by Chris Nauroth.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1469839 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kihwal Lee 2013-04-19 14:08:29 +00:00
parent 44bf8525a5
commit 16cc4a6e86
3 changed files with 15 additions and 5 deletions

View File

@ -2560,6 +2560,9 @@ Release 0.23.8 - UNRELEASED
HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal) HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal)
HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails
sporadically (Chris Nauroth via kihwal)
Release 0.23.7 - UNRELEASED Release 0.23.7 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1286,7 +1286,10 @@ public class DataNode extends Configured
LOG.warn("checkDiskError: exception: ", e); LOG.warn("checkDiskError: exception: ", e);
if (e instanceof SocketException || e instanceof SocketTimeoutException if (e instanceof SocketException || e instanceof SocketTimeoutException
|| e instanceof ClosedByInterruptException || e instanceof ClosedByInterruptException
|| e.getMessage().startsWith("Broken pipe")) { || e.getMessage().startsWith("An established connection was aborted")
|| e.getMessage().startsWith("Broken pipe")
|| e.getMessage().startsWith("Connection reset")
|| e.getMessage().contains("java.nio.channels.SocketChannel")) {
LOG.info("Not checking disk as checkDiskError was called on a network" + LOG.info("Not checking disk as checkDiskError was called on a network" +
" related exception"); " related exception");
return; return;

View File

@ -422,6 +422,11 @@ public class TestPipelinesFailover {
// Disable permissions so that another user can recover the lease. // Disable permissions so that another user can recover the lease.
harness.conf.setBoolean( harness.conf.setBoolean(
DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
// This test triggers rapid NN failovers. The client retry policy uses an
// exponential backoff. This can quickly lead to long sleep times and even
// timeout the whole test. Cap the sleep time at 1s to prevent this.
harness.conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
1000);
final MiniDFSCluster cluster = harness.startCluster(); final MiniDFSCluster cluster = harness.startCluster();
try { try {
@ -537,11 +542,10 @@ public class TestPipelinesFailover {
} }
/** /**
* Try to cover the lease on the given file for up to 30 * Try to recover the lease on the given file for up to 60 seconds.
* seconds.
* @param fsOtherUser the filesystem to use for the recoverLease call * @param fsOtherUser the filesystem to use for the recoverLease call
* @param testPath the path on which to run lease recovery * @param testPath the path on which to run lease recovery
* @throws TimeoutException if lease recover does not succeed within 30 * @throws TimeoutException if lease recover does not succeed within 60
* seconds * seconds
* @throws InterruptedException if the thread is interrupted * @throws InterruptedException if the thread is interrupted
*/ */
@ -564,7 +568,7 @@ public class TestPipelinesFailover {
} }
return success; return success;
} }
}, 1000, 30000); }, 1000, 60000);
} catch (TimeoutException e) { } catch (TimeoutException e) {
throw new TimeoutException("Timed out recovering lease for " + throw new TimeoutException("Timed out recovering lease for " +
testPath); testPath);