From 9c5f7f290eb05808fe89835a15bee0947b91d1a0 Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Wed, 8 Jun 2016 13:44:22 -0700 Subject: [PATCH] HDFS-10220. A large number of expired leases can make namenode unresponsive and cause failover (Nicolas Fraison via raviprak) (cherry picked from commit ae047655f4355288406cd5396fb4e3ea7c307b14) --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 10 +++++ .../server/common/HdfsServerConstants.java | 1 - .../hdfs/server/namenode/FSNamesystem.java | 42 +++++++++++++++---- .../hdfs/server/namenode/LeaseManager.java | 21 ++++++++-- .../src/main/resources/hdfs-default.xml | 18 ++++++++ .../server/namenode/TestLeaseManager.java | 24 ++++++----- 6 files changed, 94 insertions(+), 22 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index abacfaf3f12..f3a4dcba0df 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -374,6 +374,16 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT = 16384; public static final int DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT = 32768; + public static final String DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY = + "dfs.namenode.lease-recheck-interval-ms"; + public static final long DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT = + 2000; + public static final String + DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY = + "dfs.namenode.max-lock-hold-to-release-lease-ms"; + public static final long + DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25; + public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor"; public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java index 2beb540c8b1..0b925f0d155 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java @@ -354,7 +354,6 @@ public interface HdfsServerConstants { } String NAMENODE_LEASE_HOLDER = "HDFS_NameNode"; - long NAMENODE_LEASE_RECHECK_INTERVAL = 2000; String CRYPTO_XATTR_ENCRYPTION_ZONE = "raw.hdfs.crypto.encryption.zone"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 0147b48f9f8..bdf15bce95a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -76,6 +76,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPI import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; @@ -372,7 +376,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, private final UserGroupInformation fsOwner; private final String supergroup; private final boolean standbyShouldCheckpoint; - + + /** Interval between each check of lease to release. */ + private final long leaseRecheckIntervalMs; + /** Maximum time the lock is hold to release lease. */ + private final long maxLockHoldToReleaseLeaseMs; + // Scan interval is not configurable. private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); @@ -791,6 +800,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY, DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT); + this.leaseRecheckIntervalMs = conf.getLong( + DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY, + DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT); + this.maxLockHoldToReleaseLeaseMs = conf.getLong( + DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY, + DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT); + // For testing purposes, allow the DT secret manager to be started regardless // of whether security is enabled. alwaysUseDelegationTokensForTests = conf.getBoolean( @@ -834,6 +850,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, return retryCache; } + @VisibleForTesting + public long getLeaseRecheckIntervalMs() { + return leaseRecheckIntervalMs; + } + + @VisibleForTesting + public long getMaxLockHoldToReleaseLeaseMs() { + return maxLockHoldToReleaseLeaseMs; + } + void lockRetryCache() { if (retryCache != null) { retryCache.lock(); @@ -3083,9 +3109,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, if(nrCompleteBlocks == nrBlocks) { finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false); - NameNode.stateChangeLog.warn("BLOCK*" - + " internalReleaseLease: All existing blocks are COMPLETE," - + " lease removed, file closed."); + NameNode.stateChangeLog.warn("BLOCK*" + + " internalReleaseLease: All existing blocks are COMPLETE," + + " lease removed, file " + src + " closed."); return true; // closed! } @@ -3122,9 +3148,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, blockManager.checkMinReplication(lastBlock)) { finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false); - NameNode.stateChangeLog.warn("BLOCK*" - + " internalReleaseLease: Committed blocks are minimally replicated," - + " lease removed, file closed."); + NameNode.stateChangeLog.warn("BLOCK*" + + " internalReleaseLease: Committed blocks are minimally" + + " replicated, lease removed, file" + src + " closed."); return true; // closed! } // Cannot close file right now, since some blocks @@ -3167,7 +3193,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false); NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " - + "Removed empty last block and closed file."); + + "Removed empty last block and closed file " + src); return true; } // start recovery of the last block for this file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index 6bc9e349f4c..69d161948b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -336,7 +336,7 @@ public class LeaseManager { } } - Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL); + Thread.sleep(fsnamesystem.getLeaseRecheckIntervalMs()); } catch(InterruptedException ie) { if (LOG.isDebugEnabled()) { LOG.debug(name + " is interrupted", ie); @@ -356,8 +356,11 @@ public class LeaseManager { boolean needSync = false; assert fsnamesystem.hasWriteLock(); - while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()) { - Lease leaseToCheck = sortedLeases.poll(); + long start = monotonicNow(); + + while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit() + && !isMaxLockHoldToReleaseLease(start)) { + Lease leaseToCheck = sortedLeases.peek(); LOG.info(leaseToCheck + " has expired hard limit"); final List removing = new ArrayList<>(); @@ -397,6 +400,11 @@ public class LeaseManager { + leaseToCheck, e); removing.add(id); } + if (isMaxLockHoldToReleaseLease(start)) { + LOG.debug("Breaking out of checkLeases after " + + fsnamesystem.getMaxLockHoldToReleaseLeaseMs() + "ms."); + break; + } } for(Long id : removing) { @@ -407,6 +415,13 @@ public class LeaseManager { return needSync; } + + /** @return true if max lock hold is reached */ + private boolean isMaxLockHoldToReleaseLease(long start) { + return monotonicNow() - start > + fsnamesystem.getMaxLockHoldToReleaseLeaseMs(); + } + @Override public synchronized String toString() { return getClass().getSimpleName() + "= {" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index c229abd70cf..7a2128bcd26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2601,6 +2601,24 @@ + + dfs.namenode.lease-recheck-interval-ms + 2000 + During the release of lease a lock is hold that make any + operations on the namenode stuck. In order to not block them during + a too long duration we stop releasing lease after this max lock limit. + + + + + dfs.namenode.max-lock-hold-to-release-lease-ms + 25 + During the release of lease a lock is hold that make any + operations on the namenode stuck. In order to not block them during + a too long duration we stop releasing lease after this max lock limit. + + + dfs.namenode.startup.delay.block.deletion.sec 0 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java index 3bb7bb71bc9..f82374596f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -39,6 +40,8 @@ public class TestLeaseManager { @Rule public Timeout timeout = new Timeout(300000); + public static long maxLockHoldToReleaseLeaseMs = 100; + @Test public void testRemoveLeases() throws Exception { FSNamesystem fsn = mock(FSNamesystem.class); @@ -57,28 +60,28 @@ public class TestLeaseManager { assertEquals(0, lm.getINodeIdWithLeases().size()); } - /** Check that even if LeaseManager.checkLease is not able to relinquish - * leases, the Namenode does't enter an infinite loop while holding the FSN - * write lock and thus become unresponsive + /** Check that LeaseManager.checkLease release some leases */ @Test - public void testCheckLeaseNotInfiniteLoop() { + public void testCheckLease() { LeaseManager lm = new LeaseManager(makeMockFsNameSystem()); + long numLease = 100; + //Make sure the leases we are going to add exceed the hard limit lm.setLeasePeriod(0, 0); - //Add some leases to the LeaseManager - lm.addLease("holder1", INodeId.ROOT_INODE_ID + 1); - lm.addLease("holder2", INodeId.ROOT_INODE_ID + 2); - lm.addLease("holder3", INodeId.ROOT_INODE_ID + 3); - assertEquals(lm.countLease(), 3); + for (long i = 0; i <= numLease - 1; i++) { + //Add some leases to the LeaseManager + lm.addLease("holder"+i, INodeId.ROOT_INODE_ID + i); + } + assertEquals(numLease, lm.countLease()); //Initiate a call to checkLease. This should exit within the test timeout lm.checkLeases(); + assertTrue(lm.countLease() < numLease); } - @Test public void testCountPath() { LeaseManager lm = new LeaseManager(makeMockFsNameSystem()); @@ -112,6 +115,7 @@ public class TestLeaseManager { when(fsn.isRunning()).thenReturn(true); when(fsn.hasWriteLock()).thenReturn(true); when(fsn.getFSDirectory()).thenReturn(dir); + when(fsn.getMaxLockHoldToReleaseLeaseMs()).thenReturn(maxLockHoldToReleaseLeaseMs); return fsn; }