HDFS-15590. namenode fails to start when ordered snapshot deletion feature is disabled (#2326)

This commit is contained in:
bshashikant 2020-09-24 14:00:41 +05:30 committed by GitHub
parent c3a90dd918
commit 368f2f637e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 5 deletions

View File

@ -241,6 +241,24 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
throws SnapshotException { throws SnapshotException {
final int i = searchSnapshot(DFSUtil.string2Bytes(snapshotName)); final int i = searchSnapshot(DFSUtil.string2Bytes(snapshotName));
if (i < 0) { if (i < 0) {
// considering a sequence like this with snapshots S1 and s2
// 1. Ordered snapshot deletion feature is turned on
// 2. Delete S2 creating edit log entry for S2 deletion
// 3. Delete S1
// 4. S2 gets deleted by snapshot gc thread creating edit log record for
// S2 deletion again
// 5. Disable Ordered snapshot deletion feature
// 6. Restarting Namenode
// In this case, when edit log replay happens actual deletion of S2
// will happen when first edit log for S2 deletion gets replayed and
// the second edit log record replay for S2 deletion will fail as snapshot
// won't exist thereby failing the Namenode start
// The idea here is to check during edit log replay, if a certain snapshot
// is not found and the ordered snapshot deletion is off, ignore the error
if (!snapshotManager.isSnapshotDeletionOrdered() &&
!snapshotManager.isImageLoaded()) {
return null;
}
throw new SnapshotException("Cannot delete snapshot " + snapshotName throw new SnapshotException("Cannot delete snapshot " + snapshotName
+ " from path " + snapshotRoot.getFullPathName() + " from path " + snapshotRoot.getFullPathName()
+ ": the snapshot does not exist."); + ": the snapshot does not exist.");

View File

@ -479,10 +479,10 @@ public class SnapshotManager implements SnapshotStatsMXBean {
void checkSnapshotLimit(int limit, int snapshotCount, String type) void checkSnapshotLimit(int limit, int snapshotCount, String type)
throws SnapshotException { throws SnapshotException {
if (snapshotCount >= limit) { if (snapshotCount >= limit) {
String msg = "there are already " + (snapshotCount + 1) String msg = "there are already " + snapshotCount
+ " snapshot(s) and the " + type + " snapshot limit is " + " snapshot(s) and the " + type + " snapshot limit is "
+ limit; + limit;
if (fsdir.isImageLoaded()) { if (isImageLoaded()) {
// We have reached the maximum snapshot limit // We have reached the maximum snapshot limit
throw new SnapshotException( throw new SnapshotException(
"Failed to create snapshot: " + msg); "Failed to create snapshot: " + msg);
@ -493,6 +493,9 @@ public class SnapshotManager implements SnapshotStatsMXBean {
} }
} }
boolean isImageLoaded() {
return fsdir.isImageLoaded();
}
/** /**
* Delete a snapshot for a snapshottable directory * Delete a snapshot for a snapshottable directory
* @param snapshotName Name of the snapshot to be deleted * @param snapshotName Name of the snapshot to be deleted

View File

@ -128,8 +128,8 @@ public class TestSnapshotCommands {
DFSTestUtil.FsShellRun("-createSnapshot /sub3 sn2", 0, DFSTestUtil.FsShellRun("-createSnapshot /sub3 sn2", 0,
"Created snapshot /sub3/.snapshot/sn2", conf); "Created snapshot /sub3/.snapshot/sn2", conf);
DFSTestUtil.FsShellRun("-createSnapshot /sub3 sn3", 1, DFSTestUtil.FsShellRun("-createSnapshot /sub3 sn3", 1,
"Failed to add snapshot: there are already 3 snapshot(s) and " "Failed to create snapshot: there are already 3 snapshot(s) and "
+ "the max snapshot limit is 3", conf); + "the per directory snapshot limit is 3", conf);
} }
@Test @Test

View File

@ -195,6 +195,26 @@ public class TestOrderedSnapshotDeletion {
assertXAttrSet("s1", hdfs, null); assertXAttrSet("s1", hdfs, null);
} }
@Test(timeout = 6000000)
public void testOrderedDeletionWithRestart() throws Exception {
DistributedFileSystem hdfs = cluster.getFileSystem();
hdfs.mkdirs(snapshottableDir);
hdfs.allowSnapshot(snapshottableDir);
final Path sub0 = new Path(snapshottableDir, "sub0");
hdfs.mkdirs(sub0);
hdfs.createSnapshot(snapshottableDir, "s0");
final Path sub1 = new Path(snapshottableDir, "sub1");
hdfs.mkdirs(sub1);
hdfs.createSnapshot(snapshottableDir, "s1");
assertXAttrSet("s1", hdfs, null);
assertXAttrSet("s1", hdfs, null);
cluster.getNameNode().getConf().
setBoolean(DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED, false);
cluster.restartNameNodes();
}
@Test(timeout = 60000) @Test(timeout = 60000)
public void testSnapshotXattrWithDisablingXattr() throws Exception { public void testSnapshotXattrWithDisablingXattr() throws Exception {
DistributedFileSystem hdfs = cluster.getFileSystem(); DistributedFileSystem hdfs = cluster.getFileSystem();