HDFS-10220. A large number of expired leases can make namenode unresponsive and cause failover (Nicolas Fraison via raviprak)

(cherry picked from commit ae047655f4)
This commit is contained in:
Ravi Prakash 2016-06-08 13:44:22 -07:00
parent 9319665461
commit 9c5f7f290e
6 changed files with 94 additions and 22 deletions

View File

@ -374,6 +374,16 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT = 16384; public static final int DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT = 16384;
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT = 32768; public static final int DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT = 32768;
public static final String DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY =
"dfs.namenode.lease-recheck-interval-ms";
public static final long DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT =
2000;
public static final String
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY =
"dfs.namenode.max-lock-hold-to-release-lease-ms";
public static final long
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25;
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor"; public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT; public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;

View File

@ -354,7 +354,6 @@ public interface HdfsServerConstants {
} }
String NAMENODE_LEASE_HOLDER = "HDFS_NameNode"; String NAMENODE_LEASE_HOLDER = "HDFS_NameNode";
long NAMENODE_LEASE_RECHECK_INTERVAL = 2000;
String CRYPTO_XATTR_ENCRYPTION_ZONE = String CRYPTO_XATTR_ENCRYPTION_ZONE =
"raw.hdfs.crypto.encryption.zone"; "raw.hdfs.crypto.encryption.zone";

View File

@ -76,6 +76,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPI
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
@ -372,7 +376,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
private final UserGroupInformation fsOwner; private final UserGroupInformation fsOwner;
private final String supergroup; private final String supergroup;
private final boolean standbyShouldCheckpoint; private final boolean standbyShouldCheckpoint;
/** Interval between each check of lease to release. */
private final long leaseRecheckIntervalMs;
/** Maximum time the lock is hold to release lease. */
private final long maxLockHoldToReleaseLeaseMs;
// Scan interval is not configurable. // Scan interval is not configurable.
private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
@ -791,6 +800,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY, DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY,
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT); DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT);
this.leaseRecheckIntervalMs = conf.getLong(
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY,
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT);
this.maxLockHoldToReleaseLeaseMs = conf.getLong(
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY,
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT);
// For testing purposes, allow the DT secret manager to be started regardless // For testing purposes, allow the DT secret manager to be started regardless
// of whether security is enabled. // of whether security is enabled.
alwaysUseDelegationTokensForTests = conf.getBoolean( alwaysUseDelegationTokensForTests = conf.getBoolean(
@ -834,6 +850,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
return retryCache; return retryCache;
} }
@VisibleForTesting
public long getLeaseRecheckIntervalMs() {
return leaseRecheckIntervalMs;
}
@VisibleForTesting
public long getMaxLockHoldToReleaseLeaseMs() {
return maxLockHoldToReleaseLeaseMs;
}
void lockRetryCache() { void lockRetryCache() {
if (retryCache != null) { if (retryCache != null) {
retryCache.lock(); retryCache.lock();
@ -3083,9 +3109,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
if(nrCompleteBlocks == nrBlocks) { if(nrCompleteBlocks == nrBlocks) {
finalizeINodeFileUnderConstruction(src, pendingFile, finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId(), false); iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*" NameNode.stateChangeLog.warn("BLOCK*" +
+ " internalReleaseLease: All existing blocks are COMPLETE," " internalReleaseLease: All existing blocks are COMPLETE," +
+ " lease removed, file closed."); " lease removed, file " + src + " closed.");
return true; // closed! return true; // closed!
} }
@ -3122,9 +3148,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
blockManager.checkMinReplication(lastBlock)) { blockManager.checkMinReplication(lastBlock)) {
finalizeINodeFileUnderConstruction(src, pendingFile, finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId(), false); iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*" NameNode.stateChangeLog.warn("BLOCK*" +
+ " internalReleaseLease: Committed blocks are minimally replicated," " internalReleaseLease: Committed blocks are minimally" +
+ " lease removed, file closed."); " replicated, lease removed, file" + src + " closed.");
return true; // closed! return true; // closed!
} }
// Cannot close file right now, since some blocks // Cannot close file right now, since some blocks
@ -3167,7 +3193,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
finalizeINodeFileUnderConstruction(src, pendingFile, finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId(), false); iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
+ "Removed empty last block and closed file."); + "Removed empty last block and closed file " + src);
return true; return true;
} }
// start recovery of the last block for this file // start recovery of the last block for this file

View File

@ -336,7 +336,7 @@ public class LeaseManager {
} }
} }
Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL); Thread.sleep(fsnamesystem.getLeaseRecheckIntervalMs());
} catch(InterruptedException ie) { } catch(InterruptedException ie) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug(name + " is interrupted", ie); LOG.debug(name + " is interrupted", ie);
@ -356,8 +356,11 @@ public class LeaseManager {
boolean needSync = false; boolean needSync = false;
assert fsnamesystem.hasWriteLock(); assert fsnamesystem.hasWriteLock();
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()) { long start = monotonicNow();
Lease leaseToCheck = sortedLeases.poll();
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()
&& !isMaxLockHoldToReleaseLease(start)) {
Lease leaseToCheck = sortedLeases.peek();
LOG.info(leaseToCheck + " has expired hard limit"); LOG.info(leaseToCheck + " has expired hard limit");
final List<Long> removing = new ArrayList<>(); final List<Long> removing = new ArrayList<>();
@ -397,6 +400,11 @@ public class LeaseManager {
+ leaseToCheck, e); + leaseToCheck, e);
removing.add(id); removing.add(id);
} }
if (isMaxLockHoldToReleaseLease(start)) {
LOG.debug("Breaking out of checkLeases after " +
fsnamesystem.getMaxLockHoldToReleaseLeaseMs() + "ms.");
break;
}
} }
for(Long id : removing) { for(Long id : removing) {
@ -407,6 +415,13 @@ public class LeaseManager {
return needSync; return needSync;
} }
/** @return true if max lock hold is reached */
private boolean isMaxLockHoldToReleaseLease(long start) {
return monotonicNow() - start >
fsnamesystem.getMaxLockHoldToReleaseLeaseMs();
}
@Override @Override
public synchronized String toString() { public synchronized String toString() {
return getClass().getSimpleName() + "= {" return getClass().getSimpleName() + "= {"

View File

@ -2601,6 +2601,24 @@
</description> </description>
</property> </property>
<property>
<name>dfs.namenode.lease-recheck-interval-ms</name>
<value>2000</value>
<description>During the release of lease a lock is hold that make any
operations on the namenode stuck. In order to not block them during
a too long duration we stop releasing lease after this max lock limit.
</description>
</property>
<property>
<name>dfs.namenode.max-lock-hold-to-release-lease-ms</name>
<value>25</value>
<description>During the release of lease a lock is hold that make any
operations on the namenode stuck. In order to not block them during
a too long duration we stop releasing lease after this max lock limit.
</description>
</property>
<property> <property>
<name>dfs.namenode.startup.delay.block.deletion.sec</name> <name>dfs.namenode.startup.delay.block.deletion.sec</name>
<value>0</value> <value>0</value>

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode;
import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNull;
@ -39,6 +40,8 @@ public class TestLeaseManager {
@Rule @Rule
public Timeout timeout = new Timeout(300000); public Timeout timeout = new Timeout(300000);
public static long maxLockHoldToReleaseLeaseMs = 100;
@Test @Test
public void testRemoveLeases() throws Exception { public void testRemoveLeases() throws Exception {
FSNamesystem fsn = mock(FSNamesystem.class); FSNamesystem fsn = mock(FSNamesystem.class);
@ -57,28 +60,28 @@ public class TestLeaseManager {
assertEquals(0, lm.getINodeIdWithLeases().size()); assertEquals(0, lm.getINodeIdWithLeases().size());
} }
/** Check that even if LeaseManager.checkLease is not able to relinquish /** Check that LeaseManager.checkLease release some leases
* leases, the Namenode does't enter an infinite loop while holding the FSN
* write lock and thus become unresponsive
*/ */
@Test @Test
public void testCheckLeaseNotInfiniteLoop() { public void testCheckLease() {
LeaseManager lm = new LeaseManager(makeMockFsNameSystem()); LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
long numLease = 100;
//Make sure the leases we are going to add exceed the hard limit //Make sure the leases we are going to add exceed the hard limit
lm.setLeasePeriod(0, 0); lm.setLeasePeriod(0, 0);
//Add some leases to the LeaseManager for (long i = 0; i <= numLease - 1; i++) {
lm.addLease("holder1", INodeId.ROOT_INODE_ID + 1); //Add some leases to the LeaseManager
lm.addLease("holder2", INodeId.ROOT_INODE_ID + 2); lm.addLease("holder"+i, INodeId.ROOT_INODE_ID + i);
lm.addLease("holder3", INodeId.ROOT_INODE_ID + 3); }
assertEquals(lm.countLease(), 3); assertEquals(numLease, lm.countLease());
//Initiate a call to checkLease. This should exit within the test timeout //Initiate a call to checkLease. This should exit within the test timeout
lm.checkLeases(); lm.checkLeases();
assertTrue(lm.countLease() < numLease);
} }
@Test @Test
public void testCountPath() { public void testCountPath() {
LeaseManager lm = new LeaseManager(makeMockFsNameSystem()); LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
@ -112,6 +115,7 @@ public class TestLeaseManager {
when(fsn.isRunning()).thenReturn(true); when(fsn.isRunning()).thenReturn(true);
when(fsn.hasWriteLock()).thenReturn(true); when(fsn.hasWriteLock()).thenReturn(true);
when(fsn.getFSDirectory()).thenReturn(dir); when(fsn.getFSDirectory()).thenReturn(dir);
when(fsn.getMaxLockHoldToReleaseLeaseMs()).thenReturn(maxLockHoldToReleaseLeaseMs);
return fsn; return fsn;
} }