From 71d0e4fca79d4305fe35e44b614703d3b9883017 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Mon, 15 Aug 2016 17:40:07 -0500 Subject: [PATCH] HDFS-10763. Open files can leak permanently due to inconsistent lease update. Contributed by Kihwal Lee. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/namenode/FSImageFormatPBINode.java | 12 ++++-- .../hdfs/server/namenode/FSNamesystem.java | 3 +- .../server/namenode/TestLeaseManager.java | 43 +++++++++++++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0ff7bba0e11..a4fcf86b130 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -255,6 +255,9 @@ Release 2.7.3 - UNRELEASED HDFS-9696. Garbage snapshot records linger forever. (kihwal) + HDFS-10763. Open files can leak permanently due to inconsistent lease + update. (kihwal) + Release 2.7.2 - 2016-01-25 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index dfd150ad33e..7206b2f6c84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -220,11 +220,13 @@ public final class FSImageFormatPBINode { private final FSDirectory dir; private final FSNamesystem fsn; private final FSImageFormatProtobuf.Loader parent; + private final List ucFiles; Loader(FSNamesystem fsn, final FSImageFormatProtobuf.Loader parent) { this.fsn = fsn; this.dir = fsn.dir; this.parent = parent; + this.ucFiles = new ArrayList(); } void loadINodeDirectorySection(InputStream in) throws IOException { @@ -268,17 +270,20 @@ public final class FSImageFormatPBINode { * Load the under-construction files section, and update the lease map */ void loadFilesUnderConstructionSection(InputStream in) throws IOException { + // This section is consumed, but not actually used for restoring leases. while (true) { FileUnderConstructionEntry entry = FileUnderConstructionEntry .parseDelimitedFrom(in); if (entry == null) { break; } - // update the lease manager - INodeFile file = dir.getInode(entry.getInodeId()).asFile(); + } + + // Add a lease for each and every file under construction. + for (INodeFile file : ucFiles) { FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature(); Preconditions.checkState(uc != null); // file must be under-construction - fsn.leaseManager.addLease(uc.getClientName(), entry.getFullPath()); + fsn.leaseManager.addLease(uc.getClientName(), file.getFullPathName()); } } @@ -346,6 +351,7 @@ public final class FSImageFormatPBINode { // under-construction information if (f.hasFileUC()) { + ucFiles.add(file); INodeSection.FileUnderConstructionFeature uc = f.getFileUC(); file.toUnderConstruction(uc.getClientName(), uc.getClientMachine()); if (blocks.length > 0) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index bef296ae84c..3887ac37dbf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4205,7 +4205,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, throw new IOException("Cannot finalize file " + src + " because it is not under construction"); } - leaseManager.removeLease(uc.getClientName(), src); pendingFile.recordModification(latestSnapshot); @@ -4214,6 +4213,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, // since we just remove the uc feature from pendingFile pendingFile.toCompleteFile(now()); + leaseManager.removeLease(uc.getClientName(), src); + waitForLoadingFSImage(); // close file and persist block allocations for this file closeFile(src, pendingFile); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java index 2f114a78c6a..570a5a5c3eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java @@ -21,9 +21,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.junit.Test; import org.mockito.Mockito; @@ -81,4 +85,43 @@ public class TestLeaseManager { //Initiate a call to checkLease. This should exit within the test timeout lm.checkLeases(); } + + /** + * Make sure the lease is restored even if only the inode has the record. + */ + @Test + public void testLeaseRestorationOnRestart() throws Exception { + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()) + .numDataNodes(1).build(); + DistributedFileSystem dfs = cluster.getFileSystem(); + + // Create an empty file + String path = "/testLeaseRestorationOnRestart"; + FSDataOutputStream out = dfs.create(new Path(path)); + + // Remove the lease from the lease manager, but leave it in the inode. + FSDirectory dir = cluster.getNamesystem().getFSDirectory(); + INodeFile file = dir.getINode(path).asFile(); + cluster.getNamesystem().leaseManager.removeLease( + file.getFileUnderConstructionFeature().getClientName(), path); + + // Save a fsimage. + dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + cluster.getNameNodeRpc().saveNamespace(); + dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + // Restart the namenode. + cluster.restartNameNode(true); + + // Check whether the lease manager has the lease + assertNotNull("Lease should exist", + cluster.getNamesystem().leaseManager.getLeaseByPath(path)); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } }