HDFS-4186. logSync() is called with the write lock held while releasing lease (Kihwal Lee via daryn)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1409988 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ce2067fb15
commit
298eb42657
|
@ -2026,6 +2026,9 @@ Release 0.23.5 - UNRELEASED
|
||||||
|
|
||||||
HDFS-4182. SecondaryNameNode leaks NameCache entries (bobby)
|
HDFS-4182. SecondaryNameNode leaks NameCache entries (bobby)
|
||||||
|
|
||||||
|
HDFS-4186. logSync() is called with the write lock held while releasing
|
||||||
|
lease (Kihwal Lee via daryn)
|
||||||
|
|
||||||
Release 0.23.4
|
Release 0.23.4
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -1720,16 +1720,25 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
short replication, long blockSize) throws AccessControlException,
|
short replication, long blockSize) throws AccessControlException,
|
||||||
SafeModeException, FileAlreadyExistsException, UnresolvedLinkException,
|
SafeModeException, FileAlreadyExistsException, UnresolvedLinkException,
|
||||||
FileNotFoundException, ParentNotDirectoryException, IOException {
|
FileNotFoundException, ParentNotDirectoryException, IOException {
|
||||||
|
boolean skipSync = false;
|
||||||
writeLock();
|
writeLock();
|
||||||
try {
|
try {
|
||||||
checkOperation(OperationCategory.WRITE);
|
checkOperation(OperationCategory.WRITE);
|
||||||
|
|
||||||
startFileInternal(src, permissions, holder, clientMachine, flag,
|
startFileInternal(src, permissions, holder, clientMachine, flag,
|
||||||
createParent, replication, blockSize);
|
createParent, replication, blockSize);
|
||||||
|
} catch (StandbyException se) {
|
||||||
|
skipSync = true;
|
||||||
|
throw se;
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
|
// There might be transactions logged while trying to recover the lease.
|
||||||
|
// They need to be sync'ed even when an exception was thrown.
|
||||||
|
if (!skipSync) {
|
||||||
|
getEditLog().logSync();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
getEditLog().logSync();
|
|
||||||
if (auditLog.isInfoEnabled() && isExternalInvocation()) {
|
if (auditLog.isInfoEnabled() && isExternalInvocation()) {
|
||||||
final HdfsFileStatus stat = dir.getFileInfo(src, false);
|
final HdfsFileStatus stat = dir.getFileInfo(src, false);
|
||||||
logAuditEvent(UserGroupInformation.getCurrentUser(),
|
logAuditEvent(UserGroupInformation.getCurrentUser(),
|
||||||
|
@ -1910,6 +1919,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
*/
|
*/
|
||||||
boolean recoverLease(String src, String holder, String clientMachine)
|
boolean recoverLease(String src, String holder, String clientMachine)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
boolean skipSync = false;
|
||||||
writeLock();
|
writeLock();
|
||||||
try {
|
try {
|
||||||
checkOperation(OperationCategory.WRITE);
|
checkOperation(OperationCategory.WRITE);
|
||||||
|
@ -1931,8 +1941,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
}
|
}
|
||||||
|
|
||||||
recoverLeaseInternal(inode, src, holder, clientMachine, true);
|
recoverLeaseInternal(inode, src, holder, clientMachine, true);
|
||||||
|
} catch (StandbyException se) {
|
||||||
|
skipSync = true;
|
||||||
|
throw se;
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
|
// There might be transactions logged while trying to recover the lease.
|
||||||
|
// They need to be sync'ed even when an exception was thrown.
|
||||||
|
if (!skipSync) {
|
||||||
|
getEditLog().logSync();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -2035,6 +2053,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
throws AccessControlException, SafeModeException,
|
throws AccessControlException, SafeModeException,
|
||||||
FileAlreadyExistsException, FileNotFoundException,
|
FileAlreadyExistsException, FileNotFoundException,
|
||||||
ParentNotDirectoryException, IOException {
|
ParentNotDirectoryException, IOException {
|
||||||
|
boolean skipSync = false;
|
||||||
if (!supportAppends) {
|
if (!supportAppends) {
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"Append is not enabled on this NameNode. Use the " +
|
"Append is not enabled on this NameNode. Use the " +
|
||||||
|
@ -2048,10 +2067,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
lb = startFileInternal(src, null, holder, clientMachine,
|
lb = startFileInternal(src, null, holder, clientMachine,
|
||||||
EnumSet.of(CreateFlag.APPEND),
|
EnumSet.of(CreateFlag.APPEND),
|
||||||
false, blockManager.maxReplication, 0);
|
false, blockManager.maxReplication, 0);
|
||||||
|
} catch (StandbyException se) {
|
||||||
|
skipSync = true;
|
||||||
|
throw se;
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
|
// There might be transactions logged while trying to recover the lease.
|
||||||
|
// They need to be sync'ed even when an exception was thrown.
|
||||||
|
if (!skipSync) {
|
||||||
|
getEditLog().logSync();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
getEditLog().logSync();
|
|
||||||
if (lb != null) {
|
if (lb != null) {
|
||||||
if (NameNode.stateChangeLog.isDebugEnabled()) {
|
if (NameNode.stateChangeLog.isDebugEnabled()) {
|
||||||
NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
|
NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
|
||||||
|
@ -2983,7 +3009,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
* RecoveryInProgressException if lease recovery is in progress.<br>
|
* RecoveryInProgressException if lease recovery is in progress.<br>
|
||||||
* IOException in case of an error.
|
* IOException in case of an error.
|
||||||
* @return true if file has been successfully finalized and closed or
|
* @return true if file has been successfully finalized and closed or
|
||||||
* false if block recovery has been initiated
|
* false if block recovery has been initiated. Since the lease owner
|
||||||
|
* has been changed and logged, caller should call logSync().
|
||||||
*/
|
*/
|
||||||
boolean internalReleaseLease(Lease lease, String src,
|
boolean internalReleaseLease(Lease lease, String src,
|
||||||
String recoveryLeaseHolder) throws AlreadyBeingCreatedException,
|
String recoveryLeaseHolder) throws AlreadyBeingCreatedException,
|
||||||
|
@ -3104,6 +3131,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
assert hasWriteLock();
|
assert hasWriteLock();
|
||||||
if(newHolder == null)
|
if(newHolder == null)
|
||||||
return lease;
|
return lease;
|
||||||
|
// The following transaction is not synced. Make sure it's sync'ed later.
|
||||||
logReassignLease(lease.getHolder(), src, newHolder);
|
logReassignLease(lease.getHolder(), src, newHolder);
|
||||||
return reassignLeaseInternal(lease, src, newHolder, pendingFile);
|
return reassignLeaseInternal(lease, src, newHolder, pendingFile);
|
||||||
}
|
}
|
||||||
|
@ -5203,13 +5231,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
|
|
||||||
private void logReassignLease(String leaseHolder, String src,
|
private void logReassignLease(String leaseHolder, String src,
|
||||||
String newHolder) {
|
String newHolder) {
|
||||||
writeLock();
|
assert hasWriteLock();
|
||||||
try {
|
getEditLog().logReassignLease(leaseHolder, src, newHolder);
|
||||||
getEditLog().logReassignLease(leaseHolder, src, newHolder);
|
|
||||||
} finally {
|
|
||||||
writeUnlock();
|
|
||||||
}
|
|
||||||
getEditLog().logSync();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -401,17 +401,21 @@ public class LeaseManager {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
for(; shouldRunMonitor && fsnamesystem.isRunning(); ) {
|
for(; shouldRunMonitor && fsnamesystem.isRunning(); ) {
|
||||||
|
boolean needSync = false;
|
||||||
try {
|
try {
|
||||||
fsnamesystem.writeLockInterruptibly();
|
fsnamesystem.writeLockInterruptibly();
|
||||||
try {
|
try {
|
||||||
if (!fsnamesystem.isInSafeMode()) {
|
if (!fsnamesystem.isInSafeMode()) {
|
||||||
checkLeases();
|
needSync = checkLeases();
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
fsnamesystem.writeUnlock();
|
fsnamesystem.writeUnlock();
|
||||||
|
// lease reassignments should to be sync'ed.
|
||||||
|
if (needSync) {
|
||||||
|
fsnamesystem.getEditLog().logSync();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
|
Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
|
||||||
} catch(InterruptedException ie) {
|
} catch(InterruptedException ie) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
|
@ -422,13 +426,16 @@ public class LeaseManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Check the leases beginning from the oldest. */
|
/** Check the leases beginning from the oldest.
|
||||||
private synchronized void checkLeases() {
|
* @return true is sync is needed.
|
||||||
|
*/
|
||||||
|
private synchronized boolean checkLeases() {
|
||||||
|
boolean needSync = false;
|
||||||
assert fsnamesystem.hasWriteLock();
|
assert fsnamesystem.hasWriteLock();
|
||||||
for(; sortedLeases.size() > 0; ) {
|
for(; sortedLeases.size() > 0; ) {
|
||||||
final Lease oldest = sortedLeases.first();
|
final Lease oldest = sortedLeases.first();
|
||||||
if (!oldest.expiredHardLimit()) {
|
if (!oldest.expiredHardLimit()) {
|
||||||
return;
|
return needSync;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info(oldest + " has expired hard limit");
|
LOG.info(oldest + " has expired hard limit");
|
||||||
|
@ -451,6 +458,10 @@ public class LeaseManager {
|
||||||
LOG.debug("Started block recovery " + p + " lease " + oldest);
|
LOG.debug("Started block recovery " + p + " lease " + oldest);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// If a lease recovery happened, we need to sync later.
|
||||||
|
if (!needSync && !completed) {
|
||||||
|
needSync = true;
|
||||||
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("Cannot release the path " + p + " in the lease "
|
LOG.error("Cannot release the path " + p + " in the lease "
|
||||||
+ oldest, e);
|
+ oldest, e);
|
||||||
|
@ -462,6 +473,7 @@ public class LeaseManager {
|
||||||
removeLease(oldest, p);
|
removeLease(oldest, p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return needSync;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -447,7 +447,7 @@ public class TestEditLog {
|
||||||
|
|
||||||
// Now ask to sync edit from B, which should sync both edits.
|
// Now ask to sync edit from B, which should sync both edits.
|
||||||
doCallLogSync(threadB, editLog);
|
doCallLogSync(threadB, editLog);
|
||||||
assertEquals("logSync from second thread should bump txid up to 2",
|
assertEquals("logSync from second thread should bump txid up to 3",
|
||||||
3, editLog.getSyncTxId());
|
3, editLog.getSyncTxId());
|
||||||
|
|
||||||
// Now ask to sync edit from A, which was already batched in - thus
|
// Now ask to sync edit from A, which was already batched in - thus
|
||||||
|
|
Loading…
Reference in New Issue