HDFS-12369. Edit log corruption due to hard lease recovery of not-closed file which has snapshots.
(cherry picked from commit 3964b131f3f0131a663377685950d1b7bd3fe63d)
This commit is contained in:
parent
7287932397
commit
9a3334ec0f
|
@ -4993,6 +4993,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isFileDeleted(INodeFile file) {
|
boolean isFileDeleted(INodeFile file) {
|
||||||
|
assert hasReadLock();
|
||||||
// Not in the inodeMap or in the snapshot but marked deleted.
|
// Not in the inodeMap or in the snapshot but marked deleted.
|
||||||
if (dir.getInode(file.getId()) == null) {
|
if (dir.getInode(file.getId()) == null) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -557,6 +557,12 @@ public class LeaseManager {
|
||||||
if (!p.startsWith("/")) {
|
if (!p.startsWith("/")) {
|
||||||
throw new IOException("Invalid path in the lease " + p);
|
throw new IOException("Invalid path in the lease " + p);
|
||||||
}
|
}
|
||||||
|
final INodeFile lastINode = iip.getLastINode().asFile();
|
||||||
|
if (fsnamesystem.isFileDeleted(lastINode)) {
|
||||||
|
// INode referred by the lease could have been deleted.
|
||||||
|
removeLease(lastINode.getId());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
boolean completed = false;
|
boolean completed = false;
|
||||||
try {
|
try {
|
||||||
completed = fsnamesystem.internalReleaseLease(
|
completed = fsnamesystem.internalReleaseLease(
|
||||||
|
|
|
@ -20,11 +20,13 @@ package org.apache.hadoop.hdfs.server.namenode;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.AbstractMap;
|
import java.util.AbstractMap;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -49,16 +51,21 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils;
|
import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.net.Node;
|
import org.apache.hadoop.net.Node;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.Timeout;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.internal.util.reflection.Whitebox;
|
import org.mockito.internal.util.reflection.Whitebox;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test race between delete and other operations. For now only addBlock()
|
* Test race between delete and other operations. For now only addBlock()
|
||||||
|
@ -71,6 +78,9 @@ public class TestDeleteRace {
|
||||||
private static final Configuration conf = new HdfsConfiguration();
|
private static final Configuration conf = new HdfsConfiguration();
|
||||||
private MiniDFSCluster cluster;
|
private MiniDFSCluster cluster;
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public Timeout timeout = new Timeout(60000 * 3);
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDeleteAddBlockRace() throws Exception {
|
public void testDeleteAddBlockRace() throws Exception {
|
||||||
testDeleteAddBlockRace(false);
|
testDeleteAddBlockRace(false);
|
||||||
|
@ -358,4 +368,78 @@ public class TestDeleteRace {
|
||||||
throws Exception {
|
throws Exception {
|
||||||
testDeleteAndCommitBlockSynchronizationRace(true);
|
testDeleteAndCommitBlockSynchronizationRace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the sequence of deleting a file that has snapshot,
|
||||||
|
* and lease manager's hard limit recovery.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDeleteAndLeaseRecoveryHardLimitSnapshot() throws Exception {
|
||||||
|
final Path rootPath = new Path("/");
|
||||||
|
final Configuration config = new Configuration();
|
||||||
|
// Disable permissions so that another user can recover the lease.
|
||||||
|
config.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
|
||||||
|
config.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
||||||
|
FSDataOutputStream stm = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(config).numDataNodes(3).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
final DistributedFileSystem fs = cluster.getFileSystem();
|
||||||
|
final Path testPath = new Path("/testfile");
|
||||||
|
stm = fs.create(testPath);
|
||||||
|
LOG.info("test on " + testPath);
|
||||||
|
|
||||||
|
// write a half block
|
||||||
|
AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
|
||||||
|
stm.hflush();
|
||||||
|
|
||||||
|
// create a snapshot, so delete does not release the file's inode.
|
||||||
|
SnapshotTestHelper.createSnapshot(fs, rootPath, "snap");
|
||||||
|
|
||||||
|
// delete the file without closing it.
|
||||||
|
fs.delete(testPath, false);
|
||||||
|
|
||||||
|
// write enough bytes to trigger an addBlock, which would fail in
|
||||||
|
// the streamer.
|
||||||
|
AppendTestUtil.write(stm, 0, BLOCK_SIZE);
|
||||||
|
|
||||||
|
// Mock a scenario that the lease reached hard limit.
|
||||||
|
final LeaseManager lm = (LeaseManager) Whitebox
|
||||||
|
.getInternalState(cluster.getNameNode().getNamesystem(),
|
||||||
|
"leaseManager");
|
||||||
|
final TreeSet<Lease> leases =
|
||||||
|
(TreeSet<Lease>) Whitebox.getInternalState(lm, "sortedLeases");
|
||||||
|
final TreeSet<Lease> spyLeases = new TreeSet<>(new Comparator<Lease>() {
|
||||||
|
@Override
|
||||||
|
public int compare(Lease o1, Lease o2) {
|
||||||
|
return Long.signum(o1.getLastUpdate() - o2.getLastUpdate());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
while (!leases.isEmpty()) {
|
||||||
|
final Lease lease = leases.first();
|
||||||
|
final Lease spyLease = Mockito.spy(lease);
|
||||||
|
Mockito.doReturn(true).when(spyLease).expiredHardLimit();
|
||||||
|
spyLeases.add(spyLease);
|
||||||
|
leases.remove(lease);
|
||||||
|
}
|
||||||
|
Whitebox.setInternalState(lm, "sortedLeases", spyLeases);
|
||||||
|
|
||||||
|
// wait for lease manager's background 'Monitor' class to check leases.
|
||||||
|
Thread.sleep(2 * conf.getLong(DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY,
|
||||||
|
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT));
|
||||||
|
|
||||||
|
LOG.info("Now check we can restart");
|
||||||
|
cluster.restartNameNodes();
|
||||||
|
LOG.info("Restart finished");
|
||||||
|
} finally {
|
||||||
|
if (stm != null) {
|
||||||
|
IOUtils.closeStream(stm);
|
||||||
|
}
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue