HDFS-14498 LeaseManager can loop forever on the file for which create has failed. Contributed by Stephen O'Donnell.

This commit is contained in:
He Xiaoqiao 2020-07-13 14:12:48 +08:00
parent 358934059f
commit e7e7a6d503
2 changed files with 118 additions and 0 deletions

View File

@ -3643,6 +3643,17 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
" internalReleaseLease: Committed blocks are minimally" +
" replicated, lease removed, file" + src + " closed.");
return true; // closed!
} else if (penultimateBlockMinStorage && lastBlock.getNumBytes() == 0) {
// HDFS-14498 - this is a file with a final block of zero bytes and was
// likely left in this state by a client which exited unexpectedly
pendingFile.removeLastBlock(lastBlock);
finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*" +
" internalReleaseLease: Committed last block is zero bytes with" +
" insufficient replicas. Final block removed, lease removed, file "
+ src + " closed.");
return true;
}
// Cannot close file right now, since some blocks
// are not yet minimally replicated.

View File

@ -24,15 +24,18 @@ import java.io.IOException;
import java.util.EnumSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.crypto.CryptoProtocolVersion;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
@ -43,6 +46,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
import org.junit.Test;
@ -314,4 +318,107 @@ public class TestLeaseRecovery {
}
}
}
/**
* HDFS-14498 - test lease can be recovered for a file where the final
* block was never registered with the DNs, and hence the IBRs will never
* be received. In this case the final block should be zero bytes and can
* be removed.
*/
@Test
public void testLeaseRecoveryEmptyCommittedLastBlock() throws Exception {
Configuration conf = new Configuration();
DFSClient client = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
DistributedFileSystem dfs = cluster.getFileSystem();
client =
new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf);
String file = "/test/f1";
Path filePath = new Path(file);
createCommittedNotCompleteFile(client, file);
// Ensure a different client cannot append the file
try {
dfs.append(filePath);
fail("Append to a file(lease is held by another client) should fail");
} catch (RemoteException e) {
assertTrue(e.getMessage().contains("file lease is currently owned"));
}
// Ensure the lease can be recovered on the first try
boolean recovered = client.recoverLease(file);
assertEquals(true, recovered);
// Ensure the recovered file can now be written
FSDataOutputStream append = dfs.append(filePath);
append.write("test".getBytes());
append.close();
} finally {
if (cluster != null) {
cluster.shutdown();
cluster = null;
}
if (client != null) {
client.close();
}
}
}
/**
* HDFS-14498 - similar to testLeaseRecoveryEmptyCommittedLastBlock except
* we wait for the lease manager to recover the lease automatically.
*/
@Test
public void testLeaseManagerRecoversEmptyCommittedLastBlock()
throws Exception {
Configuration conf = new Configuration();
DFSClient client = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
client =
new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf);
String file = "/test/f1";
createCommittedNotCompleteFile(client, file);
waitLeaseRecovery(cluster);
GenericTestUtils.waitFor(() -> {
String holder = NameNodeAdapter
.getLeaseHolderForPath(cluster.getNameNode(), file);
return holder == null;
}, 100, 10000);
} finally {
if (cluster != null) {
cluster.shutdown();
cluster = null;
}
if (client != null) {
client.close();
}
}
}
private void createCommittedNotCompleteFile(DFSClient client, String file)
throws IOException {
HdfsFileStatus stat = client.getNamenode()
.create(file, new FsPermission("777"), "test client",
new EnumSetWritable<CreateFlag>(EnumSet.of(CreateFlag.CREATE)),
true, (short) 1, 1024 * 1024 * 128L,
new CryptoProtocolVersion[0], null, null);
// Add a block to the file
LocatedBlock blk = client.getNamenode()
.addBlock(file, "test client", null,
new DatanodeInfo[0], stat.getFileId(), new String[0], null);
// Without writing anything to the file, or setting up the DN pipeline
// attempt to close the file. This will fail (return false) as the NN will
// be expecting the registered block to be reported from the DNs via IBR,
// but that will never happen, as the pipeline was never established
boolean closed = client.getNamenode().complete(
file, "test client", blk.getBlock(), stat.getFileId());
assertEquals(false, closed);
}
}