HDFS-10220. A large number of expired leases can make namenode unresponsive and cause failover (Nicolas Fraison via raviprak)
(cherry picked from commitae047655f4
) (cherry picked from commit9c5f7f290e
)
This commit is contained in:
parent
c5dafb4ad7
commit
588ea6d431
|
@ -365,6 +365,16 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT = 16384;
|
||||
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT = 32768;
|
||||
|
||||
public static final String DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY =
|
||||
"dfs.namenode.lease-recheck-interval-ms";
|
||||
public static final long DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT =
|
||||
2000;
|
||||
public static final String
|
||||
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY =
|
||||
"dfs.namenode.max-lock-hold-to-release-lease-ms";
|
||||
public static final long
|
||||
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25;
|
||||
|
||||
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
|
||||
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;
|
||||
|
||||
|
|
|
@ -355,7 +355,6 @@ public interface HdfsServerConstants {
|
|||
}
|
||||
|
||||
String NAMENODE_LEASE_HOLDER = "HDFS_NameNode";
|
||||
long NAMENODE_LEASE_RECHECK_INTERVAL = 2000;
|
||||
|
||||
String CRYPTO_XATTR_ENCRYPTION_ZONE =
|
||||
"raw.hdfs.crypto.encryption.zone";
|
||||
|
|
|
@ -82,6 +82,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DAT
|
|||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
|
||||
|
@ -383,7 +387,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
private final UserGroupInformation fsOwner;
|
||||
private final String supergroup;
|
||||
private final boolean standbyShouldCheckpoint;
|
||||
|
||||
|
||||
/** Interval between each check of lease to release. */
|
||||
private final long leaseRecheckIntervalMs;
|
||||
/** Maximum time the lock is hold to release lease. */
|
||||
private final long maxLockHoldToReleaseLeaseMs;
|
||||
|
||||
// Scan interval is not configurable.
|
||||
private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
|
||||
TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
|
||||
|
@ -819,6 +828,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY,
|
||||
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT);
|
||||
|
||||
this.leaseRecheckIntervalMs = conf.getLong(
|
||||
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY,
|
||||
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT);
|
||||
this.maxLockHoldToReleaseLeaseMs = conf.getLong(
|
||||
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY,
|
||||
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT);
|
||||
|
||||
// For testing purposes, allow the DT secret manager to be started regardless
|
||||
// of whether security is enabled.
|
||||
alwaysUseDelegationTokensForTests = conf.getBoolean(
|
||||
|
@ -863,6 +879,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
return retryCache;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getLeaseRecheckIntervalMs() {
|
||||
return leaseRecheckIntervalMs;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getMaxLockHoldToReleaseLeaseMs() {
|
||||
return maxLockHoldToReleaseLeaseMs;
|
||||
}
|
||||
|
||||
void lockRetryCache() {
|
||||
if (retryCache != null) {
|
||||
retryCache.lock();
|
||||
|
@ -3120,9 +3146,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
if(nrCompleteBlocks == nrBlocks) {
|
||||
finalizeINodeFileUnderConstruction(src, pendingFile,
|
||||
iip.getLatestSnapshotId(), false);
|
||||
NameNode.stateChangeLog.warn("BLOCK*"
|
||||
+ " internalReleaseLease: All existing blocks are COMPLETE,"
|
||||
+ " lease removed, file closed.");
|
||||
NameNode.stateChangeLog.warn("BLOCK*" +
|
||||
" internalReleaseLease: All existing blocks are COMPLETE," +
|
||||
" lease removed, file " + src + " closed.");
|
||||
return true; // closed!
|
||||
}
|
||||
|
||||
|
@ -3159,9 +3185,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
blockManager.checkMinReplication(lastBlock)) {
|
||||
finalizeINodeFileUnderConstruction(src, pendingFile,
|
||||
iip.getLatestSnapshotId(), false);
|
||||
NameNode.stateChangeLog.warn("BLOCK*"
|
||||
+ " internalReleaseLease: Committed blocks are minimally replicated,"
|
||||
+ " lease removed, file closed.");
|
||||
NameNode.stateChangeLog.warn("BLOCK*" +
|
||||
" internalReleaseLease: Committed blocks are minimally" +
|
||||
" replicated, lease removed, file" + src + " closed.");
|
||||
return true; // closed!
|
||||
}
|
||||
// Cannot close file right now, since some blocks
|
||||
|
@ -3204,7 +3230,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
finalizeINodeFileUnderConstruction(src, pendingFile,
|
||||
iip.getLatestSnapshotId(), false);
|
||||
NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
|
||||
+ "Removed empty last block and closed file.");
|
||||
+ "Removed empty last block and closed file " + src);
|
||||
return true;
|
||||
}
|
||||
// start recovery of the last block for this file
|
||||
|
|
|
@ -336,7 +336,7 @@ public class LeaseManager {
|
|||
}
|
||||
}
|
||||
|
||||
Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
|
||||
Thread.sleep(fsnamesystem.getLeaseRecheckIntervalMs());
|
||||
} catch(InterruptedException ie) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(name + " is interrupted", ie);
|
||||
|
@ -356,8 +356,11 @@ public class LeaseManager {
|
|||
boolean needSync = false;
|
||||
assert fsnamesystem.hasWriteLock();
|
||||
|
||||
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()) {
|
||||
Lease leaseToCheck = sortedLeases.poll();
|
||||
long start = monotonicNow();
|
||||
|
||||
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()
|
||||
&& !isMaxLockHoldToReleaseLease(start)) {
|
||||
Lease leaseToCheck = sortedLeases.peek();
|
||||
LOG.info(leaseToCheck + " has expired hard limit");
|
||||
|
||||
final List<Long> removing = new ArrayList<>();
|
||||
|
@ -397,6 +400,11 @@ public class LeaseManager {
|
|||
+ leaseToCheck, e);
|
||||
removing.add(id);
|
||||
}
|
||||
if (isMaxLockHoldToReleaseLease(start)) {
|
||||
LOG.debug("Breaking out of checkLeases after " +
|
||||
fsnamesystem.getMaxLockHoldToReleaseLeaseMs() + "ms.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(Long id : removing) {
|
||||
|
@ -407,6 +415,13 @@ public class LeaseManager {
|
|||
return needSync;
|
||||
}
|
||||
|
||||
|
||||
/** @return true if max lock hold is reached */
|
||||
private boolean isMaxLockHoldToReleaseLease(long start) {
|
||||
return monotonicNow() - start >
|
||||
fsnamesystem.getMaxLockHoldToReleaseLeaseMs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
return getClass().getSimpleName() + "= {"
|
||||
|
|
|
@ -2594,6 +2594,24 @@
|
|||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.lease-recheck-interval-ms</name>
|
||||
<value>2000</value>
|
||||
<description>During the release of lease a lock is hold that make any
|
||||
operations on the namenode stuck. In order to not block them during
|
||||
a too long duration we stop releasing lease after this max lock limit.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.max-lock-hold-to-release-lease-ms</name>
|
||||
<value>25</value>
|
||||
<description>During the release of lease a lock is hold that make any
|
||||
operations on the namenode stuck. In order to not block them during
|
||||
a too long duration we stop releasing lease after this max lock limit.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
|
||||
<value>0</value>
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode;
|
|||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
|
||||
|
@ -39,6 +40,8 @@ public class TestLeaseManager {
|
|||
@Rule
|
||||
public Timeout timeout = new Timeout(300000);
|
||||
|
||||
public static long maxLockHoldToReleaseLeaseMs = 100;
|
||||
|
||||
@Test
|
||||
public void testRemoveLeases() throws Exception {
|
||||
FSNamesystem fsn = mock(FSNamesystem.class);
|
||||
|
@ -57,28 +60,28 @@ public class TestLeaseManager {
|
|||
assertEquals(0, lm.getINodeIdWithLeases().size());
|
||||
}
|
||||
|
||||
/** Check that even if LeaseManager.checkLease is not able to relinquish
|
||||
* leases, the Namenode does't enter an infinite loop while holding the FSN
|
||||
* write lock and thus become unresponsive
|
||||
/** Check that LeaseManager.checkLease release some leases
|
||||
*/
|
||||
@Test
|
||||
public void testCheckLeaseNotInfiniteLoop() {
|
||||
public void testCheckLease() {
|
||||
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
|
||||
|
||||
long numLease = 100;
|
||||
|
||||
//Make sure the leases we are going to add exceed the hard limit
|
||||
lm.setLeasePeriod(0, 0);
|
||||
|
||||
//Add some leases to the LeaseManager
|
||||
lm.addLease("holder1", INodeId.ROOT_INODE_ID + 1);
|
||||
lm.addLease("holder2", INodeId.ROOT_INODE_ID + 2);
|
||||
lm.addLease("holder3", INodeId.ROOT_INODE_ID + 3);
|
||||
assertEquals(lm.countLease(), 3);
|
||||
for (long i = 0; i <= numLease - 1; i++) {
|
||||
//Add some leases to the LeaseManager
|
||||
lm.addLease("holder"+i, INodeId.ROOT_INODE_ID + i);
|
||||
}
|
||||
assertEquals(numLease, lm.countLease());
|
||||
|
||||
//Initiate a call to checkLease. This should exit within the test timeout
|
||||
lm.checkLeases();
|
||||
assertTrue(lm.countLease() < numLease);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testCountPath() {
|
||||
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
|
||||
|
@ -112,6 +115,7 @@ public class TestLeaseManager {
|
|||
when(fsn.isRunning()).thenReturn(true);
|
||||
when(fsn.hasWriteLock()).thenReturn(true);
|
||||
when(fsn.getFSDirectory()).thenReturn(dir);
|
||||
when(fsn.getMaxLockHoldToReleaseLeaseMs()).thenReturn(maxLockHoldToReleaseLeaseMs);
|
||||
return fsn;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue