From 86c92227fc56b6e06d879d250728e8dc8cbe98fe Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Fri, 23 Oct 2015 17:27:45 -0700 Subject: [PATCH] HDFS-4015. Safemode should count and report orphaned blocks. (Contributed by Anu Engineer) --- .../org/apache/hadoop/hdfs/DFSClient.java | 10 + .../hadoop/hdfs/DistributedFileSystem.java | 11 ++ .../hadoop/hdfs/protocol/ClientProtocol.java | 5 +- .../hadoop/hdfs/protocol/HdfsConstants.java | 2 +- .../hdfs/protocolPB/PBHelperClient.java | 14 +- .../main/proto/ClientNamenodeProtocol.proto | 2 + hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/blockmanagement/BlockManager.java | 61 +++++- .../blockmanagement/HeartbeatManager.java | 1 + .../hdfs/server/namenode/FSNamesystem.java | 73 ++++++- .../hadoop/hdfs/server/namenode/NameNode.java | 7 +- .../server/namenode/NameNodeStatusMXBean.java | 6 + .../apache/hadoop/hdfs/tools/DFSAdmin.java | 18 +- .../src/site/markdown/HDFSCommands.md | 6 +- .../TestNameNodeMetadataConsistency.java | 186 ++++++++++++++++++ .../src/test/resources/testHDFSConf.xml | 20 +- 16 files changed, 408 insertions(+), 17 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetadataConsistency.java diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 08f25f5d0b4..ca0538ef0f8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -2005,6 +2005,16 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, return callGetStats()[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX]; } + /** + * Returns number of bytes that reside in Blocks with future generation + * stamps. + * @return Bytes in Blocks with future generation stamps. + * @throws IOException + */ + public long getBytesInFutureBlocks() throws IOException { + return callGetStats()[ClientProtocol.GET_STATS_BYTES_IN_FUTURE_BLOCKS_IDX]; + } + /** * @return a list in which each entry describes a corrupt file/block * @throws IOException diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index 39cc42b26ca..f4ca2657d4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -536,6 +536,17 @@ public class DistributedFileSystem extends FileSystem { return Arrays.asList(dfs.getStoragePolicies()); } + /** + * Returns number of bytes within blocks with future generation stamp. These + * are bytes that will be potentially deleted if we forceExit from safe mode. + * + * @return number of bytes. + */ + public long getBytesWithFutureGenerationStamps() throws IOException { + statistics.incrementReadOps(1); + return dfs.getBytesInFutureBlocks(); + } + /** * Deprecated. Prefer {@link FileSystem#getAllStoragePolicies()} * @throws IOException diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 2a40047e879..6ebb01d8d04 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -712,10 +712,12 @@ public interface ClientProtocol { int GET_STATS_CORRUPT_BLOCKS_IDX = 4; int GET_STATS_MISSING_BLOCKS_IDX = 5; int GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX = 6; + int GET_STATS_BYTES_IN_FUTURE_BLOCKS_IDX = 7; + int STATS_ARRAY_LENGTH = 8; /** * Get a set of statistics about the filesystem. - * Right now, only seven values are returned. + * Right now, only eight values are returned. * * Use public constants like {@link #GET_STATS_CAPACITY_IDX} in place of * actual numbers to index into the array. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java index e744b858f64..ba3e5cf8507 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java @@ -118,7 +118,7 @@ public final class HdfsConstants { // SafeMode actions public enum SafeModeAction { - SAFEMODE_LEAVE, SAFEMODE_ENTER, SAFEMODE_GET + SAFEMODE_LEAVE, SAFEMODE_ENTER, SAFEMODE_GET, SAFEMODE_FORCE_EXIT } public enum RollingUpgradeAction { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java index 496f06f708a..7ad9013c8da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java @@ -1508,6 +1508,8 @@ public class PBHelperClient { return SafeModeActionProto.SAFEMODE_ENTER; case SAFEMODE_GET: return SafeModeActionProto.SAFEMODE_GET; + case SAFEMODE_FORCE_EXIT: + return SafeModeActionProto.SAFEMODE_FORCE_EXIT; default: throw new IllegalArgumentException("Unexpected SafeModeAction :" + a); } @@ -1522,7 +1524,7 @@ public class PBHelperClient { } public static long[] convert(GetFsStatsResponseProto res) { - long[] result = new long[7]; + long[] result = new long[ClientProtocol.STATS_ARRAY_LENGTH]; result[ClientProtocol.GET_STATS_CAPACITY_IDX] = res.getCapacity(); result[ClientProtocol.GET_STATS_USED_IDX] = res.getUsed(); result[ClientProtocol.GET_STATS_REMAINING_IDX] = res.getRemaining(); @@ -1534,6 +1536,8 @@ public class PBHelperClient { res.getMissingBlocks(); result[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX] = res.getMissingReplOneBlocks(); + result[ClientProtocol.GET_STATS_BYTES_IN_FUTURE_BLOCKS_IDX] = + res.hasBlocksInFuture() ? res.getBlocksInFuture() : 0; return result; } @@ -1897,6 +1901,12 @@ public class PBHelperClient { ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX + 1) result.setMissingReplOneBlocks( fsStats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX]); + + if (fsStats.length >= + ClientProtocol.GET_STATS_BYTES_IN_FUTURE_BLOCKS_IDX + 1) { + result.setBlocksInFuture( + fsStats[ClientProtocol.GET_STATS_BYTES_IN_FUTURE_BLOCKS_IDX]); + } return result.build(); } @@ -1920,6 +1930,8 @@ public class PBHelperClient { return SafeModeAction.SAFEMODE_ENTER; case SAFEMODE_GET: return SafeModeAction.SAFEMODE_GET; + case SAFEMODE_FORCE_EXIT: + return SafeModeAction.SAFEMODE_FORCE_EXIT; default: throw new IllegalArgumentException("Unexpected SafeModeAction :" + a); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto index 6a140ebe7ef..865f9462a13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto @@ -306,6 +306,7 @@ message GetFsStatsResponseProto { required uint64 corrupt_blocks = 5; required uint64 missing_blocks = 6; optional uint64 missing_repl_one_blocks = 7; + optional uint64 blocks_in_future = 8; } enum DatanodeReportTypeProto { // type of the datanode report @@ -348,6 +349,7 @@ enum SafeModeActionProto { SAFEMODE_LEAVE = 1; SAFEMODE_ENTER = 2; SAFEMODE_GET = 3; + SAFEMODE_FORCE_EXIT = 4; } message SetSafeModeRequestProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f5f13cf8aa4..e4dc598177b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1566,6 +1566,9 @@ Release 2.8.0 - UNRELEASED HDFS-8808. dfs.image.transfer.bandwidthPerSec should not apply to -bootstrapStandby (zhz) + HDFS-4015. Safemode should count and report orphaned blocks. + (Anu Engineer via Arpit Agarwal) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index a312936e0a0..5f55ecee79c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -68,6 +68,7 @@ import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; import org.apache.hadoop.hdfs.server.blockmanagement.CorruptReplicasMap.Reason; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo.AddBlockResult; import org.apache.hadoop.hdfs.server.blockmanagement.PendingDataNodeMessages.ReportedBlockInfo; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.namenode.CachedBlock; @@ -292,6 +293,12 @@ public class BlockManager implements BlockStatsMXBean { /** Check whether there are any non-EC blocks using StripedID */ private boolean hasNonEcBlockUsingStripedID = false; + /** Keeps track of how many bytes are in Future Generation blocks. */ + private AtomicLong numberOfBytesInFutureBlocks; + + /** Reports if Name node was started with Rollback option. */ + private boolean inRollBack = false; + public BlockManager(final Namesystem namesystem, final Configuration conf) throws IOException { this.namesystem = namesystem; @@ -370,6 +377,8 @@ public class BlockManager implements BlockStatsMXBean { DFSConfigKeys.DFS_BLOCK_MISREPLICATION_PROCESSING_LIMIT, DFSConfigKeys.DFS_BLOCK_MISREPLICATION_PROCESSING_LIMIT_DEFAULT); this.blockReportLeaseManager = new BlockReportLeaseManager(conf); + this.numberOfBytesInFutureBlocks = new AtomicLong(); + this.inRollBack = isInRollBackMode(NameNode.getStartupOption(conf)); LOG.info("defaultReplication = " + defaultReplication); LOG.info("maxReplication = " + maxReplication); @@ -2274,9 +2283,19 @@ public class BlockManager implements BlockStatsMXBean { } BlockInfo storedBlock = getStoredBlock(iblk); - // If block does not belong to any file, we are done. - if (storedBlock == null) continue; - + + // If block does not belong to any file, we check if it violates + // an integrity assumption of Name node + if (storedBlock == null) { + if (namesystem.isInStartupSafeMode() + && !shouldPostponeBlocksFromFuture + && !inRollBack + && namesystem.isGenStampInFuture(iblk)) { + numberOfBytesInFutureBlocks.addAndGet(iblk.getBytesOnDisk()); + } + continue; + } + // If block is corrupt, mark it and continue to next block. BlockUCState ucState = storedBlock.getBlockUCState(); BlockToMarkCorrupt c = checkReplicaCorrupt( @@ -4154,4 +4173,40 @@ public class BlockManager implements BlockStatsMXBean { return false; return haContext.getState().shouldPopulateReplQueues(); } + + /** + * Returns the number of bytes that reside in blocks with Generation Stamps + * greater than generation stamp known to Namenode. + * + * @return Bytes in future + */ + public long getBytesInFuture() { + return numberOfBytesInFutureBlocks.get(); + } + + /** + * Clears the bytes in future counter. + */ + public void clearBytesInFuture() { + numberOfBytesInFutureBlocks.set(0); + } + + /** + * Returns true if Namenode was started with a RollBack option. + * + * @param option - StartupOption + * @return boolean + */ + private boolean isInRollBackMode(HdfsServerConstants.StartupOption option) { + if (option == HdfsServerConstants.StartupOption.ROLLBACK) { + return true; + } + if ((option == HdfsServerConstants.StartupOption.ROLLINGUPGRADE) && + (option.getRollingUpgradeStartupOption() == + HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK)) { + return true; + } + return false; + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java index 7d34d4f8ada..d0369aa1e5b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java @@ -181,6 +181,7 @@ class HeartbeatManager implements DatanodeStatistics { -1L, -1L, -1L, + -1L, -1L}; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 38202207732..2753270e42f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -3903,6 +3903,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); stats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX] = getMissingReplOneBlocksCount(); + stats[ClientProtocol.GET_STATS_BYTES_IN_FUTURE_BLOCKS_IDX] = + blockManager.getBytesInFuture(); return stats; } @@ -4299,13 +4301,25 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, * Leave safe mode. *

* Check for invalid, under- & over-replicated blocks in the end of startup. + * @param force - true to force exit */ - private synchronized void leave() { + private synchronized void leave(boolean force) { // if not done yet, initialize replication queues. // In the standby, do not populate repl queues if (!blockManager.isPopulatingReplQueues() && blockManager.shouldPopulateReplQueues()) { blockManager.initializeReplQueues(); } + + + if (!force && (blockManager.getBytesInFuture() > 0)) { + LOG.error("Refusing to leave safe mode without a force flag. " + + "Exiting safe mode will cause a deletion of " + blockManager + .getBytesInFuture() + " byte(s). Please use " + + "-forceExit flag to exit safe mode forcefully if data loss is " + + "acceptable."); + return; + } + long timeInSafemode = now() - startTime; NameNode.stateChangeLog.info("STATE* Leaving safe mode after " + timeInSafemode/1000 + " secs"); @@ -4403,7 +4417,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, // the threshold is reached or was reached before if (!isOn() || // safe mode is off extension <= 0 || threshold <= 0) { // don't need to wait - this.leave(); // leave safe mode + this.leave(false); // leave safe mode return; } if (reached > 0) { // threshold has already been reached before @@ -4560,6 +4574,21 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + "the minimum number %d. ", numLive, datanodeThreshold); } + + if(blockManager.getBytesInFuture() > 0) { + msg += "Name node detected blocks with generation stamps " + + "in future. This means that Name node metadata is inconsistent." + + "This can happen if Name node metadata files have been manually " + + "replaced. Exiting safe mode will cause loss of " + blockManager + .getBytesInFuture() + " byte(s). Please restart name node with " + + "right metadata or use \"hdfs dfsadmin -safemode forceExit" + + "if you are certain that the NameNode was started with the" + + "correct FsImage and edit logs. If you encountered this during" + + "a rollback, it is safe to exit with -safemode forceExit."; + return msg; + } + + msg += (reached > 0) ? "In safe mode extension. " : ""; msg += "Safe mode will be turned off automatically "; @@ -4661,7 +4690,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, } if (safeMode.canLeave()) { // Leave safe mode. - safeMode.leave(); + safeMode.leave(false); smmthread = null; break; } @@ -4686,11 +4715,31 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, checkSuperuserPrivilege(); switch(action) { case SAFEMODE_LEAVE: // leave safe mode + if (blockManager.getBytesInFuture() > 0) { + LOG.error("Refusing to leave safe mode without a force flag. " + + "Exiting safe mode will cause a deletion of " + blockManager + .getBytesInFuture() + " byte(s). Please use " + + "-forceExit flag to exit safe mode forcefully and data loss is " + + "acceptable."); + return isInSafeMode(); + } leaveSafeMode(); break; case SAFEMODE_ENTER: // enter safe mode enterSafeMode(false); break; + case SAFEMODE_FORCE_EXIT: + if (blockManager.getBytesInFuture() > 0) { + LOG.warn("Leaving safe mode due to forceExit. This will cause a data " + + "loss of " + blockManager.getBytesInFuture() + " byte(s)."); + safeMode.leave(true); + blockManager.clearBytesInFuture(); + } else { + LOG.warn("forceExit used when normal exist would suffice. Treating " + + "force exit as normal safe mode exit."); + } + leaveSafeMode(); + break; default: LOG.error("Unexpected safe mode action"); } @@ -4869,7 +4918,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); return; } - safeMode.leave(); + safeMode.leave(false); } finally { writeUnlock(); } @@ -7445,5 +7494,21 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, throws IOException { return FSDirErasureCodingOp.getErasureCodingPolicy(this, src); } + + /** + * Gets number of bytes in the blocks in future generation stamps. + * + * @return number of bytes that can be deleted if exited from safe mode. + */ + public long getBytesInFuture() { + return blockManager.getBytesInFuture(); + } + + @VisibleForTesting + synchronized void enableSafeModeForTesting(Configuration conf) { + SafeModeInfo newSafemode = new SafeModeInfo(conf); + newSafemode.enter(); + this.safeMode = newSafemode; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 19c6415562f..b050cbe391b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -1373,7 +1373,7 @@ public class NameNode implements NameNodeStatusMXBean { conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name()); } - static StartupOption getStartupOption(Configuration conf) { + public static StartupOption getStartupOption(Configuration conf) { return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, StartupOption.REGULAR.toString())); } @@ -1688,6 +1688,11 @@ public class NameNode implements NameNodeStatusMXBean { return state.getLastHATransitionTime(); } + @Override //NameNodeStatusMXBean + public long getBytesWithFutureGenerationStamps() { + return getNamesystem().getBytesInFuture(); + } + /** * Shutdown the NN immediately in an ungraceful way. Used when it would be * unsafe for the NN to continue operating, e.g. during a failed HA state diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java index e3f712afafc..4f0b0011c80 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java @@ -61,4 +61,10 @@ public interface NameNodeStatusMXBean { * @return the most recent HA transition time in milliseconds from the epoch. */ public long getLastHATransitionTime(); + + /** + * Gets number of bytes in blocks with future generation stamps. + * @return number of bytes that can be deleted if exited from safe mode. + */ + long getBytesWithFutureGenerationStamps(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 9c7a1efd06b..2c6797c052f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -462,10 +462,21 @@ public class DFSAdmin extends FsShell { long capacity = ds.getCapacity(); long used = ds.getUsed(); long remaining = ds.getRemaining(); + long bytesInFuture = dfs.getBytesWithFutureGenerationStamps(); long presentCapacity = used + remaining; boolean mode = dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET); if (mode) { System.out.println("Safe mode is ON"); + if (bytesInFuture > 0) { + System.out.println("\nWARNING: "); + System.out.println("Name node has detected blocks with generation " + + "stamps in future."); + System.out.println("Forcing exit from safemode will cause " + + bytesInFuture + " byte(s) to be deleted."); + System.out.println("If you are sure that the NameNode was started with" + + " the correct metadata files then you may proceed with " + + "'-safemode forceExit'\n"); + } } System.out.println("Configured Capacity: " + capacity + " (" + StringUtils.byteDesc(capacity) + ")"); @@ -574,6 +585,8 @@ public class DFSAdmin extends FsShell { } else if ("wait".equalsIgnoreCase(argv[idx])) { action = HdfsConstants.SafeModeAction.SAFEMODE_GET; waitExitSafe = true; + } else if ("forceExit".equalsIgnoreCase(argv[idx])){ + action = HdfsConstants.SafeModeAction.SAFEMODE_FORCE_EXIT; } else { printUsage("-safemode"); return; @@ -949,7 +962,8 @@ public class DFSAdmin extends FsShell { "\tand etc. on all the DNs.\n" + "\tOptional flags may be used to filter the list of displayed DNs.\n"; - String safemode = "-safemode : Safe mode maintenance command.\n" + + String safemode = "-safemode : Safe mode " + + "maintenance command.\n" + "\t\tSafe mode is a Namenode state in which it\n" + "\t\t\t1. does not accept changes to the name space (read-only)\n" + "\t\t\t2. does not replicate or delete blocks.\n" + @@ -1643,7 +1657,7 @@ public class DFSAdmin extends FsShell { + " [-report] [-live] [-dead] [-decommissioning]"); } else if ("-safemode".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" - + " [-safemode enter | leave | get | wait]"); + + " [-safemode enter | leave | get | wait | forceExit]"); } else if ("-allowSnapshot".equalsIgnoreCase(cmd)) { System.err.println("Usage: hdfs dfsadmin" + " [-allowSnapshot ]"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md index 604fde47c1d..65846698f14 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md @@ -1,4 +1,4 @@ - + safemode: Test for foceExit + + -fs NAMENODE -safemode enter + -fs NAMENODE -safemode forceExit + + + + + + + TokenComparator + Safe mode is OFF + + + + + safemode: Test for leave - Namenode is already in safemode