From 0cb4b2039d263fee6772cb3136707055a1157768 Mon Sep 17 00:00:00 2001 From: Daryn Sharp Date: Thu, 15 Nov 2012 20:35:16 +0000 Subject: [PATCH] svn merge -c 1409988 FIXES: HDFS-4186 logSync() is called with the write lock held while releasing lease (Kihwal Lee via daryn) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1409992 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hdfs/server/namenode/FSNamesystem.java | 45 ++++++++++++++----- .../hdfs/server/namenode/LeaseManager.java | 22 ++++++--- .../hdfs/server/namenode/TestEditLog.java | 2 +- 4 files changed, 55 insertions(+), 17 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index eb64472db93..7937d45c136 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1746,6 +1746,9 @@ Release 0.23.5 - UNRELEASED HDFS-4182. SecondaryNameNode leaks NameCache entries (bobby) + HDFS-4186. logSync() is called with the write lock held while releasing + lease (Kihwal Lee via daryn) + Release 0.23.4 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index d11a1619a6b..b48a6b46ec6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1704,16 +1704,25 @@ public class FSNamesystem implements Namesystem, FSClusterStats, short replication, long blockSize) throws AccessControlException, SafeModeException, FileAlreadyExistsException, UnresolvedLinkException, FileNotFoundException, ParentNotDirectoryException, IOException { + boolean skipSync = false; writeLock(); try { checkOperation(OperationCategory.WRITE); startFileInternal(src, permissions, holder, clientMachine, flag, createParent, replication, blockSize); + } catch (StandbyException se) { + skipSync = true; + throw se; } finally { writeUnlock(); - } - getEditLog().logSync(); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { + getEditLog().logSync(); + } + } + if (auditLog.isInfoEnabled() && isExternalInvocation()) { final HdfsFileStatus stat = dir.getFileInfo(src, false); logAuditEvent(UserGroupInformation.getCurrentUser(), @@ -1894,6 +1903,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ boolean recoverLease(String src, String holder, String clientMachine) throws IOException { + boolean skipSync = false; writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1915,8 +1925,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } recoverLeaseInternal(inode, src, holder, clientMachine, true); + } catch (StandbyException se) { + skipSync = true; + throw se; } finally { writeUnlock(); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { + getEditLog().logSync(); + } } return false; } @@ -2019,6 +2037,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throws AccessControlException, SafeModeException, FileAlreadyExistsException, FileNotFoundException, ParentNotDirectoryException, IOException { + boolean skipSync = false; if (!supportAppends) { throw new UnsupportedOperationException( "Append is not enabled on this NameNode. Use the " + @@ -2032,10 +2051,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats, lb = startFileInternal(src, null, holder, clientMachine, EnumSet.of(CreateFlag.APPEND), false, blockManager.maxReplication, 0); + } catch (StandbyException se) { + skipSync = true; + throw se; } finally { writeUnlock(); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { + getEditLog().logSync(); + } } - getEditLog().logSync(); if (lb != null) { if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file " @@ -2959,7 +2985,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * RecoveryInProgressException if lease recovery is in progress.
* IOException in case of an error. * @return true if file has been successfully finalized and closed or - * false if block recovery has been initiated + * false if block recovery has been initiated. Since the lease owner + * has been changed and logged, caller should call logSync(). */ boolean internalReleaseLease(Lease lease, String src, String recoveryLeaseHolder) throws AlreadyBeingCreatedException, @@ -3080,6 +3107,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, assert hasWriteLock(); if(newHolder == null) return lease; + // The following transaction is not synced. Make sure it's sync'ed later. logReassignLease(lease.getHolder(), src, newHolder); return reassignLeaseInternal(lease, src, newHolder, pendingFile); } @@ -5179,13 +5207,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private void logReassignLease(String leaseHolder, String src, String newHolder) { - writeLock(); - try { - getEditLog().logReassignLease(leaseHolder, src, newHolder); - } finally { - writeUnlock(); - } - getEditLog().logSync(); + assert hasWriteLock(); + getEditLog().logReassignLease(leaseHolder, src, newHolder); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index 13fff598f46..fd1cbfcc571 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -401,17 +401,21 @@ public class LeaseManager { @Override public void run() { for(; shouldRunMonitor && fsnamesystem.isRunning(); ) { + boolean needSync = false; try { fsnamesystem.writeLockInterruptibly(); try { if (!fsnamesystem.isInSafeMode()) { - checkLeases(); + needSync = checkLeases(); } } finally { fsnamesystem.writeUnlock(); + // lease reassignments should to be sync'ed. + if (needSync) { + fsnamesystem.getEditLog().logSync(); + } } - Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL); } catch(InterruptedException ie) { if (LOG.isDebugEnabled()) { @@ -422,13 +426,16 @@ public class LeaseManager { } } - /** Check the leases beginning from the oldest. */ - private synchronized void checkLeases() { + /** Check the leases beginning from the oldest. + * @return true is sync is needed. + */ + private synchronized boolean checkLeases() { + boolean needSync = false; assert fsnamesystem.hasWriteLock(); for(; sortedLeases.size() > 0; ) { final Lease oldest = sortedLeases.first(); if (!oldest.expiredHardLimit()) { - return; + return needSync; } LOG.info(oldest + " has expired hard limit"); @@ -451,6 +458,10 @@ public class LeaseManager { LOG.debug("Started block recovery " + p + " lease " + oldest); } } + // If a lease recovery happened, we need to sync later. + if (!needSync && !completed) { + needSync = true; + } } catch (IOException e) { LOG.error("Cannot release the path " + p + " in the lease " + oldest, e); @@ -462,6 +473,7 @@ public class LeaseManager { removeLease(oldest, p); } } + return needSync; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java index 5df69765893..d2befc921a0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java @@ -446,7 +446,7 @@ public class TestEditLog { // Now ask to sync edit from B, which should sync both edits. doCallLogSync(threadB, editLog); - assertEquals("logSync from second thread should bump txid up to 2", + assertEquals("logSync from second thread should bump txid up to 3", 3, editLog.getSyncTxId()); // Now ask to sync edit from A, which was already batched in - thus