HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, leads to NN safemode. Contributed by Vinay.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1541773 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de9271686a
commit
735aae32e4
|
@ -550,6 +550,9 @@ Release 2.3.0 - UNRELEASED
|
|||
HDFS-5075. httpfs-config.sh calls out incorrect env script name
|
||||
(Timothy St. Clair via stevel)
|
||||
|
||||
HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold,
|
||||
leads to NN safemode. (Vinay via jing9)
|
||||
|
||||
Release 2.2.1 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -593,7 +593,7 @@ public class FSEditLogLoader {
|
|||
fsNamesys.getSnapshotManager().deleteSnapshot(
|
||||
deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName,
|
||||
collectedBlocks, removedINodes);
|
||||
fsNamesys.removeBlocks(collectedBlocks);
|
||||
fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
|
||||
collectedBlocks.clear();
|
||||
fsNamesys.dir.removeFromInodeMap(removedINodes);
|
||||
removedINodes.clear();
|
||||
|
|
|
@ -3316,6 +3316,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
return;
|
||||
}
|
||||
|
||||
removeBlocksAndUpdateSafemodeTotal(blocks);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the blocks from blocksmap and updates the safemode blocks total
|
||||
*
|
||||
* @param blocks
|
||||
* An instance of {@link BlocksMapUpdateInfo} which contains a list
|
||||
* of blocks that need to be removed from blocksMap
|
||||
*/
|
||||
void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
|
||||
assert hasWriteLock();
|
||||
// In the case that we are a Standby tailing edits from the
|
||||
// active while in safe-mode, we need to track the total number
|
||||
// of blocks and safe blocks in the system.
|
||||
|
@ -3336,9 +3348,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
}
|
||||
if (trackBlockCounts) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" +
|
||||
"decreasing safeBlocks by " + numRemovedSafe +
|
||||
", totalBlocks by " + numRemovedComplete);
|
||||
LOG.debug("Adjusting safe-mode totals for deletion."
|
||||
+ "decreasing safeBlocks by " + numRemovedSafe
|
||||
+ ", totalBlocks by " + numRemovedComplete);
|
||||
}
|
||||
adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
|
|||
import org.apache.hadoop.hdfs.DFSUtil;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
|
@ -47,7 +48,10 @@ import org.apache.hadoop.hdfs.server.namenode.INode;
|
|||
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
|
||||
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryWithQuota;
|
||||
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.apache.hadoop.hdfs.server.namenode.Quota;
|
||||
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
|
||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList;
|
||||
import org.apache.hadoop.hdfs.util.ReadOnlyList;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
|
@ -949,4 +953,54 @@ public class TestSnapshotDeletion {
|
|||
psOut.close();
|
||||
out.close();
|
||||
}
|
||||
|
||||
/*
|
||||
* OP_DELETE_SNAPSHOT edits op was not decrementing the safemode threshold on
|
||||
* restart in HA mode. HDFS-5504
|
||||
*/
|
||||
@Test(timeout = 60000)
|
||||
public void testHANNRestartAfterSnapshotDeletion() throws Exception {
|
||||
hdfs.close();
|
||||
cluster.shutdown();
|
||||
conf = new Configuration();
|
||||
cluster = new MiniDFSCluster.Builder(conf)
|
||||
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
|
||||
.build();
|
||||
cluster.transitionToActive(0);
|
||||
// stop the standby namenode
|
||||
NameNode snn = cluster.getNameNode(1);
|
||||
snn.stop();
|
||||
|
||||
hdfs = (DistributedFileSystem) HATestUtil
|
||||
.configureFailoverFs(cluster, conf);
|
||||
Path dir = new Path("/dir");
|
||||
Path subDir = new Path(dir, "sub");
|
||||
hdfs.mkdirs(dir);
|
||||
hdfs.allowSnapshot(dir);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
DFSTestUtil.createFile(hdfs, new Path(subDir, "" + i), 100, (short) 1,
|
||||
1024L);
|
||||
}
|
||||
|
||||
// take snapshot
|
||||
hdfs.createSnapshot(dir, "s0");
|
||||
|
||||
// delete the subdir
|
||||
hdfs.delete(subDir, true);
|
||||
|
||||
// roll the edit log
|
||||
NameNode ann = cluster.getNameNode(0);
|
||||
ann.getRpcServer().rollEditLog();
|
||||
|
||||
hdfs.deleteSnapshot(dir, "s0");
|
||||
// wait for the blocks deletion at namenode
|
||||
Thread.sleep(2000);
|
||||
|
||||
NameNodeAdapter.abortEditLogs(ann);
|
||||
cluster.restartNameNode(0, false);
|
||||
cluster.transitionToActive(0);
|
||||
|
||||
// wait till the cluster becomes active
|
||||
cluster.waitClusterUp();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue