HDFS-10220. A large number of expired leases can make namenode unresponsive and cause failover (Nicolas Fraison via raviprak)
(cherry picked from commit ae047655f4
)
This commit is contained in:
parent
9319665461
commit
9c5f7f290e
|
@ -374,6 +374,16 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT = 16384;
|
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT = 16384;
|
||||||
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT = 32768;
|
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT = 32768;
|
||||||
|
|
||||||
|
public static final String DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY =
|
||||||
|
"dfs.namenode.lease-recheck-interval-ms";
|
||||||
|
public static final long DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT =
|
||||||
|
2000;
|
||||||
|
public static final String
|
||||||
|
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY =
|
||||||
|
"dfs.namenode.max-lock-hold-to-release-lease-ms";
|
||||||
|
public static final long
|
||||||
|
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25;
|
||||||
|
|
||||||
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
|
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
|
||||||
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;
|
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;
|
||||||
|
|
||||||
|
|
|
@ -354,7 +354,6 @@ public interface HdfsServerConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
String NAMENODE_LEASE_HOLDER = "HDFS_NameNode";
|
String NAMENODE_LEASE_HOLDER = "HDFS_NameNode";
|
||||||
long NAMENODE_LEASE_RECHECK_INTERVAL = 2000;
|
|
||||||
|
|
||||||
String CRYPTO_XATTR_ENCRYPTION_ZONE =
|
String CRYPTO_XATTR_ENCRYPTION_ZONE =
|
||||||
"raw.hdfs.crypto.encryption.zone";
|
"raw.hdfs.crypto.encryption.zone";
|
||||||
|
|
|
@ -76,6 +76,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPI
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
|
||||||
|
@ -372,7 +376,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
private final UserGroupInformation fsOwner;
|
private final UserGroupInformation fsOwner;
|
||||||
private final String supergroup;
|
private final String supergroup;
|
||||||
private final boolean standbyShouldCheckpoint;
|
private final boolean standbyShouldCheckpoint;
|
||||||
|
|
||||||
|
/** Interval between each check of lease to release. */
|
||||||
|
private final long leaseRecheckIntervalMs;
|
||||||
|
/** Maximum time the lock is hold to release lease. */
|
||||||
|
private final long maxLockHoldToReleaseLeaseMs;
|
||||||
|
|
||||||
// Scan interval is not configurable.
|
// Scan interval is not configurable.
|
||||||
private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
|
private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
|
||||||
TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
|
TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
|
||||||
|
@ -791,6 +800,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY,
|
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY,
|
||||||
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT);
|
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT);
|
||||||
|
|
||||||
|
this.leaseRecheckIntervalMs = conf.getLong(
|
||||||
|
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY,
|
||||||
|
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT);
|
||||||
|
this.maxLockHoldToReleaseLeaseMs = conf.getLong(
|
||||||
|
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY,
|
||||||
|
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT);
|
||||||
|
|
||||||
// For testing purposes, allow the DT secret manager to be started regardless
|
// For testing purposes, allow the DT secret manager to be started regardless
|
||||||
// of whether security is enabled.
|
// of whether security is enabled.
|
||||||
alwaysUseDelegationTokensForTests = conf.getBoolean(
|
alwaysUseDelegationTokensForTests = conf.getBoolean(
|
||||||
|
@ -834,6 +850,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
return retryCache;
|
return retryCache;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getLeaseRecheckIntervalMs() {
|
||||||
|
return leaseRecheckIntervalMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getMaxLockHoldToReleaseLeaseMs() {
|
||||||
|
return maxLockHoldToReleaseLeaseMs;
|
||||||
|
}
|
||||||
|
|
||||||
void lockRetryCache() {
|
void lockRetryCache() {
|
||||||
if (retryCache != null) {
|
if (retryCache != null) {
|
||||||
retryCache.lock();
|
retryCache.lock();
|
||||||
|
@ -3083,9 +3109,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
if(nrCompleteBlocks == nrBlocks) {
|
if(nrCompleteBlocks == nrBlocks) {
|
||||||
finalizeINodeFileUnderConstruction(src, pendingFile,
|
finalizeINodeFileUnderConstruction(src, pendingFile,
|
||||||
iip.getLatestSnapshotId(), false);
|
iip.getLatestSnapshotId(), false);
|
||||||
NameNode.stateChangeLog.warn("BLOCK*"
|
NameNode.stateChangeLog.warn("BLOCK*" +
|
||||||
+ " internalReleaseLease: All existing blocks are COMPLETE,"
|
" internalReleaseLease: All existing blocks are COMPLETE," +
|
||||||
+ " lease removed, file closed.");
|
" lease removed, file " + src + " closed.");
|
||||||
return true; // closed!
|
return true; // closed!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3122,9 +3148,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
blockManager.checkMinReplication(lastBlock)) {
|
blockManager.checkMinReplication(lastBlock)) {
|
||||||
finalizeINodeFileUnderConstruction(src, pendingFile,
|
finalizeINodeFileUnderConstruction(src, pendingFile,
|
||||||
iip.getLatestSnapshotId(), false);
|
iip.getLatestSnapshotId(), false);
|
||||||
NameNode.stateChangeLog.warn("BLOCK*"
|
NameNode.stateChangeLog.warn("BLOCK*" +
|
||||||
+ " internalReleaseLease: Committed blocks are minimally replicated,"
|
" internalReleaseLease: Committed blocks are minimally" +
|
||||||
+ " lease removed, file closed.");
|
" replicated, lease removed, file" + src + " closed.");
|
||||||
return true; // closed!
|
return true; // closed!
|
||||||
}
|
}
|
||||||
// Cannot close file right now, since some blocks
|
// Cannot close file right now, since some blocks
|
||||||
|
@ -3167,7 +3193,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
finalizeINodeFileUnderConstruction(src, pendingFile,
|
finalizeINodeFileUnderConstruction(src, pendingFile,
|
||||||
iip.getLatestSnapshotId(), false);
|
iip.getLatestSnapshotId(), false);
|
||||||
NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
|
NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
|
||||||
+ "Removed empty last block and closed file.");
|
+ "Removed empty last block and closed file " + src);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// start recovery of the last block for this file
|
// start recovery of the last block for this file
|
||||||
|
|
|
@ -336,7 +336,7 @@ public class LeaseManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
|
Thread.sleep(fsnamesystem.getLeaseRecheckIntervalMs());
|
||||||
} catch(InterruptedException ie) {
|
} catch(InterruptedException ie) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug(name + " is interrupted", ie);
|
LOG.debug(name + " is interrupted", ie);
|
||||||
|
@ -356,8 +356,11 @@ public class LeaseManager {
|
||||||
boolean needSync = false;
|
boolean needSync = false;
|
||||||
assert fsnamesystem.hasWriteLock();
|
assert fsnamesystem.hasWriteLock();
|
||||||
|
|
||||||
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()) {
|
long start = monotonicNow();
|
||||||
Lease leaseToCheck = sortedLeases.poll();
|
|
||||||
|
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()
|
||||||
|
&& !isMaxLockHoldToReleaseLease(start)) {
|
||||||
|
Lease leaseToCheck = sortedLeases.peek();
|
||||||
LOG.info(leaseToCheck + " has expired hard limit");
|
LOG.info(leaseToCheck + " has expired hard limit");
|
||||||
|
|
||||||
final List<Long> removing = new ArrayList<>();
|
final List<Long> removing = new ArrayList<>();
|
||||||
|
@ -397,6 +400,11 @@ public class LeaseManager {
|
||||||
+ leaseToCheck, e);
|
+ leaseToCheck, e);
|
||||||
removing.add(id);
|
removing.add(id);
|
||||||
}
|
}
|
||||||
|
if (isMaxLockHoldToReleaseLease(start)) {
|
||||||
|
LOG.debug("Breaking out of checkLeases after " +
|
||||||
|
fsnamesystem.getMaxLockHoldToReleaseLeaseMs() + "ms.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(Long id : removing) {
|
for(Long id : removing) {
|
||||||
|
@ -407,6 +415,13 @@ public class LeaseManager {
|
||||||
return needSync;
|
return needSync;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** @return true if max lock hold is reached */
|
||||||
|
private boolean isMaxLockHoldToReleaseLease(long start) {
|
||||||
|
return monotonicNow() - start >
|
||||||
|
fsnamesystem.getMaxLockHoldToReleaseLeaseMs();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized String toString() {
|
public synchronized String toString() {
|
||||||
return getClass().getSimpleName() + "= {"
|
return getClass().getSimpleName() + "= {"
|
||||||
|
|
|
@ -2601,6 +2601,24 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.lease-recheck-interval-ms</name>
|
||||||
|
<value>2000</value>
|
||||||
|
<description>During the release of lease a lock is hold that make any
|
||||||
|
operations on the namenode stuck. In order to not block them during
|
||||||
|
a too long duration we stop releasing lease after this max lock limit.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.max-lock-hold-to-release-lease-ms</name>
|
||||||
|
<value>25</value>
|
||||||
|
<description>During the release of lease a lock is hold that make any
|
||||||
|
operations on the namenode stuck. In order to not block them during
|
||||||
|
a too long duration we stop releasing lease after this max lock limit.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
|
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
|
||||||
<value>0</value>
|
<value>0</value>
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import static org.hamcrest.CoreMatchers.is;
|
import static org.hamcrest.CoreMatchers.is;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertNull;
|
import static org.junit.Assert.assertNull;
|
||||||
|
|
||||||
|
@ -39,6 +40,8 @@ public class TestLeaseManager {
|
||||||
@Rule
|
@Rule
|
||||||
public Timeout timeout = new Timeout(300000);
|
public Timeout timeout = new Timeout(300000);
|
||||||
|
|
||||||
|
public static long maxLockHoldToReleaseLeaseMs = 100;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRemoveLeases() throws Exception {
|
public void testRemoveLeases() throws Exception {
|
||||||
FSNamesystem fsn = mock(FSNamesystem.class);
|
FSNamesystem fsn = mock(FSNamesystem.class);
|
||||||
|
@ -57,28 +60,28 @@ public class TestLeaseManager {
|
||||||
assertEquals(0, lm.getINodeIdWithLeases().size());
|
assertEquals(0, lm.getINodeIdWithLeases().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Check that even if LeaseManager.checkLease is not able to relinquish
|
/** Check that LeaseManager.checkLease release some leases
|
||||||
* leases, the Namenode does't enter an infinite loop while holding the FSN
|
|
||||||
* write lock and thus become unresponsive
|
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testCheckLeaseNotInfiniteLoop() {
|
public void testCheckLease() {
|
||||||
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
|
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
|
||||||
|
|
||||||
|
long numLease = 100;
|
||||||
|
|
||||||
//Make sure the leases we are going to add exceed the hard limit
|
//Make sure the leases we are going to add exceed the hard limit
|
||||||
lm.setLeasePeriod(0, 0);
|
lm.setLeasePeriod(0, 0);
|
||||||
|
|
||||||
//Add some leases to the LeaseManager
|
for (long i = 0; i <= numLease - 1; i++) {
|
||||||
lm.addLease("holder1", INodeId.ROOT_INODE_ID + 1);
|
//Add some leases to the LeaseManager
|
||||||
lm.addLease("holder2", INodeId.ROOT_INODE_ID + 2);
|
lm.addLease("holder"+i, INodeId.ROOT_INODE_ID + i);
|
||||||
lm.addLease("holder3", INodeId.ROOT_INODE_ID + 3);
|
}
|
||||||
assertEquals(lm.countLease(), 3);
|
assertEquals(numLease, lm.countLease());
|
||||||
|
|
||||||
//Initiate a call to checkLease. This should exit within the test timeout
|
//Initiate a call to checkLease. This should exit within the test timeout
|
||||||
lm.checkLeases();
|
lm.checkLeases();
|
||||||
|
assertTrue(lm.countLease() < numLease);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCountPath() {
|
public void testCountPath() {
|
||||||
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
|
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
|
||||||
|
@ -112,6 +115,7 @@ public class TestLeaseManager {
|
||||||
when(fsn.isRunning()).thenReturn(true);
|
when(fsn.isRunning()).thenReturn(true);
|
||||||
when(fsn.hasWriteLock()).thenReturn(true);
|
when(fsn.hasWriteLock()).thenReturn(true);
|
||||||
when(fsn.getFSDirectory()).thenReturn(dir);
|
when(fsn.getFSDirectory()).thenReturn(dir);
|
||||||
|
when(fsn.getMaxLockHoldToReleaseLeaseMs()).thenReturn(maxLockHoldToReleaseLeaseMs);
|
||||||
return fsn;
|
return fsn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue