HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails sporadically. Contributed by Chris Nauroth.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1469839 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kihwal Lee 2013-04-19 14:08:29 +00:00
parent 44bf8525a5
commit 16cc4a6e86
3 changed files with 15 additions and 5 deletions

View File

@ -2560,6 +2560,9 @@ Release 0.23.8 - UNRELEASED
HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal)
HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails
sporadically (Chris Nauroth via kihwal)
Release 0.23.7 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -1286,7 +1286,10 @@ public class DataNode extends Configured
LOG.warn("checkDiskError: exception: ", e);
if (e instanceof SocketException || e instanceof SocketTimeoutException
|| e instanceof ClosedByInterruptException
|| e.getMessage().startsWith("Broken pipe")) {
|| e.getMessage().startsWith("An established connection was aborted")
|| e.getMessage().startsWith("Broken pipe")
|| e.getMessage().startsWith("Connection reset")
|| e.getMessage().contains("java.nio.channels.SocketChannel")) {
LOG.info("Not checking disk as checkDiskError was called on a network" +
" related exception");
return;

View File

@ -422,6 +422,11 @@ public class TestPipelinesFailover {
// Disable permissions so that another user can recover the lease.
harness.conf.setBoolean(
DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
// This test triggers rapid NN failovers. The client retry policy uses an
// exponential backoff. This can quickly lead to long sleep times and even
// timeout the whole test. Cap the sleep time at 1s to prevent this.
harness.conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
1000);
final MiniDFSCluster cluster = harness.startCluster();
try {
@ -537,11 +542,10 @@ public class TestPipelinesFailover {
}
/**
* Try to cover the lease on the given file for up to 30
* seconds.
* Try to recover the lease on the given file for up to 60 seconds.
* @param fsOtherUser the filesystem to use for the recoverLease call
* @param testPath the path on which to run lease recovery
* @throws TimeoutException if lease recover does not succeed within 30
* @throws TimeoutException if lease recover does not succeed within 60
* seconds
* @throws InterruptedException if the thread is interrupted
*/
@ -564,7 +568,7 @@ public class TestPipelinesFailover {
}
return success;
}
}, 1000, 30000);
}, 1000, 60000);
} catch (TimeoutException e) {
throw new TimeoutException("Timed out recovering lease for " +
testPath);