From fbce046a8816bfeef2a07687e73ccd6d8282d934 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Fri, 19 Apr 2013 14:11:28 +0000 Subject: [PATCH] svn merge -c 1469839 Merging from trunk to branch-2 to fix HDFS-4699. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1469840 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 6 ++++++ .../apache/hadoop/hdfs/server/datanode/DataNode.java | 5 ++++- .../hdfs/server/namenode/NameNodeRpcServer.java | 5 ++++- .../server/namenode/ha/TestPipelinesFailover.java | 12 ++++++++---- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0100d1837c0..a41c1b95df6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -50,6 +50,9 @@ Release 2.0.5-beta - UNRELEASED HDFS-4693. Some test cases in TestCheckpoint do not clean up after themselves. (Arpit Agarwal, suresh via suresh) + HDFS-3817. Avoid printing SafeModeException stack trace. + (Brandon Li via suresh) + OPTIMIZATIONS BUG FIXES @@ -2221,6 +2224,9 @@ Release 0.23.8 - UNRELEASED HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal) + HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails + sporadically (Chris Nauroth via kihwal) + Release 0.23.7 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 104bfcd3567..8d52581802d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1177,7 +1177,10 @@ public class DataNode extends Configured LOG.warn("checkDiskError: exception: ", e); if (e instanceof SocketException || e instanceof SocketTimeoutException || e instanceof ClosedByInterruptException - || e.getMessage().startsWith("Broken pipe")) { + || e.getMessage().startsWith("An established connection was aborted") + || e.getMessage().startsWith("Broken pipe") + || e.getMessage().startsWith("Connection reset") + || e.getMessage().contains("java.nio.channels.SocketChannel")) { LOG.info("Not checking disk as checkDiskError was called on a network" + " related exception"); return; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 9130590fee3..35f3416d225 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -273,7 +273,10 @@ class NameNodeRpcServer implements NamenodeProtocols { minimumDataNodeVersion = conf.get( DFSConfigKeys.DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_KEY, DFSConfigKeys.DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_DEFAULT); - } + + // Set terse exception whose stack trace won't be logged + this.clientRpcServer.addTerseExceptions(SafeModeException.class); + } /** * Start client and service RPC servers. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java index f7af9b03aed..bfac1afcaaa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java @@ -422,6 +422,11 @@ public class TestPipelinesFailover { // Disable permissions so that another user can recover the lease. harness.conf.setBoolean( DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); + // This test triggers rapid NN failovers. The client retry policy uses an + // exponential backoff. This can quickly lead to long sleep times and even + // timeout the whole test. Cap the sleep time at 1s to prevent this. + harness.conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY, + 1000); final MiniDFSCluster cluster = harness.startCluster(); try { @@ -537,11 +542,10 @@ public class TestPipelinesFailover { } /** - * Try to cover the lease on the given file for up to 30 - * seconds. + * Try to recover the lease on the given file for up to 60 seconds. * @param fsOtherUser the filesystem to use for the recoverLease call * @param testPath the path on which to run lease recovery - * @throws TimeoutException if lease recover does not succeed within 30 + * @throws TimeoutException if lease recover does not succeed within 60 * seconds * @throws InterruptedException if the thread is interrupted */ @@ -564,7 +568,7 @@ public class TestPipelinesFailover { } return success; } - }, 1000, 30000); + }, 1000, 60000); } catch (TimeoutException e) { throw new TimeoutException("Timed out recovering lease for " + testPath);