From 19ae0faacc5b1e8ce05f11c555922c0c025dcf3b Mon Sep 17 00:00:00 2001 From: bshashikant Date: Sat, 6 Feb 2021 21:56:12 +0530 Subject: [PATCH] HDFS-15820. Ensure snapshot root trash provisioning happens only post safe mode exit (#2682) --- .../blockmanagement/BlockManagerSafeMode.java | 2 +- .../hdfs/server/namenode/FSNamesystem.java | 56 +++++++++++-------- .../hadoop/hdfs/server/namenode/NameNode.java | 3 - .../hdfs/server/namenode/Namesystem.java | 10 +++- .../hdfs/TestDistributedFileSystem.java | 17 +++++- 5 files changed, 58 insertions(+), 30 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java index 0e8937548f3..d731143b172 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java @@ -425,7 +425,7 @@ class BlockManagerSafeMode { BlockManagerSafeMode.STEP_AWAITING_REPORTED_BLOCKS); prog.endPhase(Phase.SAFEMODE); } - + namesystem.checkAndProvisionSnapshotTrashRoots(); return true; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 5bb31b8d6d1..9c3dd25f12e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -124,7 +124,8 @@ import org.apache.hadoop.hdfs.server.common.ECTopologyVerifier; import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.ipc.ObserverRetryOnActiveException; -import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.*; + import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.util.Time.monotonicNow; import static org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics.TOPMETRICS_METRICS_SOURCE_NAME; @@ -329,11 +330,6 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.delegation.DelegationKey; -import org.apache.hadoop.util.Daemon; -import org.apache.hadoop.util.DataChecksum; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.VersionInfo; import org.apache.log4j.Logger; import org.apache.log4j.Appender; import org.apache.log4j.AsyncAppender; @@ -8531,25 +8527,37 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, * Check if snapshot roots are created for all existing snapshottable * directories. Create them if not. */ - void checkAndProvisionSnapshotTrashRoots() throws IOException { - SnapshottableDirectoryStatus[] dirStatusList = getSnapshottableDirListing(); - if (dirStatusList == null) { - return; - } - for (SnapshottableDirectoryStatus dirStatus : dirStatusList) { - String currDir = dirStatus.getFullPath().toString(); - if (!currDir.endsWith(Path.SEPARATOR)) { - currDir += Path.SEPARATOR; - } - String trashPath = currDir + FileSystem.TRASH_PREFIX; - HdfsFileStatus fileStatus = getFileInfo(trashPath, false, false, false); - if (fileStatus == null) { - LOG.info("Trash doesn't exist for snapshottable directory {}. " - + "Creating trash at {}", currDir, trashPath); - PermissionStatus permissionStatus = new PermissionStatus(getRemoteUser() - .getShortUserName(), null, SHARED_TRASH_PERMISSION); - mkdirs(trashPath, permissionStatus, false); + @Override + public void checkAndProvisionSnapshotTrashRoots() { + if (isSnapshotTrashRootEnabled) { + try { + SnapshottableDirectoryStatus[] dirStatusList = + getSnapshottableDirListing(); + if (dirStatusList == null) { + return; + } + for (SnapshottableDirectoryStatus dirStatus : dirStatusList) { + String currDir = dirStatus.getFullPath().toString(); + if (!currDir.endsWith(Path.SEPARATOR)) { + currDir += Path.SEPARATOR; + } + String trashPath = currDir + FileSystem.TRASH_PREFIX; + HdfsFileStatus fileStatus = getFileInfo(trashPath, false, false, false); + if (fileStatus == null) { + LOG.info("Trash doesn't exist for snapshottable directory {}. " + "Creating trash at {}", currDir, trashPath); + PermissionStatus permissionStatus = + new PermissionStatus(getRemoteUser().getShortUserName(), null, + SHARED_TRASH_PERMISSION); + mkdirs(trashPath, permissionStatus, false); + } + } + } catch (IOException e) { + final String msg = + "Could not provision Trash directory for existing " + + "snapshottable directories. Exiting Namenode."; + ExitUtil.terminate(1, msg); } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index c6ea823e6cc..3225bab32a0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -2011,9 +2011,6 @@ public class NameNode extends ReconfigurableBase implements public void startActiveServices() throws IOException { try { namesystem.startActiveServices(); - if (namesystem.isSnapshotTrashRootEnabled()) { - namesystem.checkAndProvisionSnapshotTrashRoots(); - } startTrashEmptier(getConf()); } catch (Throwable t) { doImmediateShutdown(t); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java index 2a525870064..fe1e3e067fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java @@ -27,7 +27,9 @@ import org.apache.hadoop.hdfs.util.RwLock; /** Namesystem operations. */ @InterfaceAudience.Private public interface Namesystem extends RwLock, SafeMode { - /** Is this name system running? */ + /** + * Is this name system running? + */ boolean isRunning(); BlockCollection getBlockCollection(long id); @@ -55,4 +57,10 @@ public interface Namesystem extends RwLock, SafeMode { * @throws IOException */ void removeXattr(long id, String xattrName) throws IOException; + + /** + * Check if snapshot roots are created for all existing snapshottable + * directories. Create them if not. + */ + void checkAndProvisionSnapshotTrashRoots(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index e0d1f92e750..f7dcaef0395 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -2524,7 +2524,7 @@ public class TestDistributedFileSystem { MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); try { - final DistributedFileSystem dfs = cluster.getFileSystem(); + DistributedFileSystem dfs = cluster.getFileSystem(); final Path testDir = new Path("/disallowss/test2/"); final Path file0path = new Path(testDir, "file-0"); dfs.create(file0path).close(); @@ -2535,7 +2535,20 @@ public class TestDistributedFileSystem { // Set dfs.namenode.snapshot.trashroot.enabled=true conf.setBoolean("dfs.namenode.snapshot.trashroot.enabled", true); cluster.setNameNodeConf(0, conf); + cluster.shutdown(); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1); cluster.restartNameNode(0); + dfs = cluster.getFileSystem(); + assertTrue(cluster.getNameNode().isInSafeMode()); + // Check .Trash existence, won't be created now + assertFalse(dfs.exists(trashRoot)); + // Start a datanode + cluster.startDataNodes(conf, 1, true, null, null); + // Wait long enough for safemode check to retire + try { + Thread.sleep(1000); + } catch (InterruptedException ignored) {} // Check .Trash existence, should be created now assertTrue(dfs.exists(trashRoot)); // Check permission @@ -2553,4 +2566,6 @@ public class TestDistributedFileSystem { } } } + + }