From 685b81f44731d063eb43a5fa6ec4a3488ee5e3ba Mon Sep 17 00:00:00 2001 From: He Xiaoqiao Date: Fri, 17 Jul 2020 00:17:17 +0800 Subject: [PATCH] HDFS-14498. LeaseManager can loop forever on the file for which create has failed. Contributed by Stephen O'Donnell. --- .../hdfs/server/namenode/FSNamesystem.java | 13 ++ .../apache/hadoop/hdfs/TestLeaseRecovery.java | 111 ++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 11ac3fc760b..43d4797d96d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -3155,6 +3155,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, * false if block recovery has been initiated. Since the lease owner * has been changed and logged, caller should call logSync(). */ + @SuppressWarnings("checkstyle:methodlength") boolean internalReleaseLease(Lease lease, String src, INodesInPath iip, String recoveryLeaseHolder) throws IOException { LOG.info("Recovering " + lease + ", src=" + src); @@ -3223,6 +3224,18 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, " internalReleaseLease: Committed blocks are minimally" + " replicated, lease removed, file" + src + " closed."); return true; // closed! + } else if (penultimateBlockMinReplication + && lastBlock.getNumBytes() == 0) { + // HDFS-14498 - this is a file with a final block of zero bytes and was + // likely left in this state by a client which exited unexpectedly + pendingFile.removeLastBlock(lastBlock); + finalizeINodeFileUnderConstruction(src, pendingFile, + iip.getLatestSnapshotId(), false); + NameNode.stateChangeLog.warn("BLOCK*" + + " internalReleaseLease: Committed last block is zero bytes with" + + " insufficient replicas. Final block removed, lease removed, file " + + src + " closed."); + return true; } // Cannot close file right now, since some blocks // are not yet minimally replicated. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index c82b47cec94..996d601f536 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -23,16 +23,20 @@ import static org.junit.Assert.fail; import java.io.IOException; import java.util.EnumSet; +import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.CryptoProtocolVersion; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.DataNode; @@ -43,6 +47,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; import org.junit.Test; @@ -314,4 +319,110 @@ public class TestLeaseRecovery { } } } + + /** + * HDFS-14498 - test lease can be recovered for a file where the final + * block was never registered with the DNs, and hence the IBRs will never + * be received. In this case the final block should be zero bytes and can + * be removed. + */ + @Test + public void testLeaseRecoveryEmptyCommittedLastBlock() throws Exception { + Configuration conf = new Configuration(); + DFSClient client = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + DistributedFileSystem dfs = cluster.getFileSystem(); + client = + new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf); + String file = "/test/f1"; + Path filePath = new Path(file); + + createCommittedNotCompleteFile(client, file); + + // Ensure a different client cannot append the file + try { + dfs.append(filePath); + fail("Append to a file(lease is held by another client) should fail"); + } catch (RemoteException e) { + assertTrue(e.getMessage().contains("file lease is currently owned")); + } + + // Ensure the lease can be recovered on the first try + boolean recovered = client.recoverLease(file); + assertEquals(true, recovered); + + // Ensure the recovered file can now be written + FSDataOutputStream append = dfs.append(filePath); + append.write("test".getBytes()); + append.close(); + } finally { + if (cluster != null) { + cluster.shutdown(); + cluster = null; + } + if (client != null) { + client.close(); + } + } + } + + /** + * HDFS-14498 - similar to testLeaseRecoveryEmptyCommittedLastBlock except + * we wait for the lease manager to recover the lease automatically. + */ + @Test + public void testLeaseManagerRecoversEmptyCommittedLastBlock() + throws Exception { + Configuration conf = new Configuration(); + DFSClient client = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + client = + new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf); + final String file = "/test/f1"; + + createCommittedNotCompleteFile(client, file); + waitLeaseRecovery(cluster); + + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + String holder = NameNodeAdapter + .getLeaseHolderForPath(cluster.getNameNode(), file); + return holder == null; + } + }, 100, 10000); + + } finally { + if (cluster != null) { + cluster.shutdown(); + cluster = null; + } + if (client != null) { + client.close(); + } + } + } + + private void createCommittedNotCompleteFile(DFSClient client, String file) + throws IOException { + HdfsFileStatus stat = client.getNamenode() + .create(file, new FsPermission("777"), "test client", + new EnumSetWritable(EnumSet.of(CreateFlag.CREATE)), + true, (short) 1, 1024 * 1024 * 128L, + new CryptoProtocolVersion[0]); + // Add a block to the file + LocatedBlock blk = client.getNamenode() + .addBlock(file, "test client", null, + new DatanodeInfo[0], stat.getFileId(), new String[0], null); + // Without writing anything to the file, or setting up the DN pipeline + // attempt to close the file. This will fail (return false) as the NN will + // be expecting the registered block to be reported from the DNs via IBR, + // but that will never happen, as the pipeline was never established + boolean closed = client.getNamenode().complete( + file, "test client", blk.getBlock(), stat.getFileId()); + assertEquals(false, closed); + } + }