diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt index a322ca56332..db4c8e881c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt @@ -77,3 +77,5 @@ HDFS-2693. Fix synchronization issues around state transition (todd) HDFS-1972. Fencing mechanism for block invalidations and replications (todd) HDFS-2714. Fix test cases which use standalone FSNamesystems (todd) + +HDFS-2692. Fix bugs related to failover from/into safe mode. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 9dba5a0e8b4..ec978f6ea10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2481,7 +2481,10 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block public void removeBlock(Block block) { assert namesystem.hasWriteLock(); - block.setNumBytes(BlockCommand.NO_ACK); + // TODO(HA): the following causes some problems for HA: + // the SBN doesn't get block deletions until the next + // BR... + // block.setNumBytes(BlockCommand.NO_ACK); addToInvalidates(block); corruptReplicas.removeFromCorruptReplicasMap(block); blocksMap.removeBlock(block); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java index 6fcf3b17a7d..4f485916b5d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java @@ -243,11 +243,16 @@ class Checkpointer extends Daemon { long txid = bnImage.getLastAppliedTxId(); - backupNode.namesystem.dir.setReady(); - backupNode.namesystem.setBlockTotal(); - - bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid); - bnStorage.writeAll(); + backupNode.namesystem.writeLock(); + try { + backupNode.namesystem.dir.setReady(); + backupNode.namesystem.setBlockTotal(); + + bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid); + bnStorage.writeAll(); + } finally { + backupNode.namesystem.writeUnlock(); + } if(cpCmd.needToReturnImage()) { TransferFsImage.uploadImageFromStorage( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index c82f425b3a4..6e9ea8e2875 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -65,6 +65,7 @@ import com.google.common.base.Joiner; @InterfaceStability.Evolving public class FSEditLogLoader { private final FSNamesystem fsNamesys; + private long maxGenStamp = 0; public FSEditLogLoader(FSNamesystem fsNamesys) { this.fsNamesys = fsNamesys; @@ -78,14 +79,19 @@ public class FSEditLogLoader { int loadFSEdits(EditLogInputStream edits, long expectedStartingTxId) throws IOException { long startTime = now(); - int numEdits = loadFSEdits(edits, true, expectedStartingTxId); - FSImage.LOG.info("Edits file " + edits.getName() - + " of size " + edits.length() + " edits # " + numEdits - + " loaded in " + (now()-startTime)/1000 + " seconds."); - return numEdits; + fsNamesys.writeLock(); + try { + int numEdits = loadFSEdits(edits, true, expectedStartingTxId); + FSImage.LOG.info("Edits file " + edits.getName() + + " of size " + edits.length() + " edits # " + numEdits + + " loaded in " + (now()-startTime)/1000 + " seconds."); + return numEdits; + } finally { + fsNamesys.writeUnlock(); + } } - int loadFSEdits(EditLogInputStream edits, boolean closeOnExit, + private int loadFSEdits(EditLogInputStream edits, boolean closeOnExit, long expectedStartingTxId) throws IOException { int numEdits = 0; @@ -95,6 +101,13 @@ public class FSEditLogLoader { numEdits = loadEditRecords(logVersion, edits, false, expectedStartingTxId); } finally { + fsNamesys.setBlockTotal(); + // Delay the notification of genstamp updates until after + // setBlockTotal() above. Otherwise, we will mark blocks + // as "safe" before they've been incorporated in the expected + // totalBlocks and threshold for SafeMode -- triggering an + // assertion failure and/or exiting safemode too early! + fsNamesys.notifyGenStampUpdate(maxGenStamp); if(closeOnExit) { edits.close(); } @@ -485,9 +498,9 @@ public class FSEditLogLoader { } } - if (addCloseOp.blocks.length > 0) { - fsNamesys.notifyGenStampUpdate( - addCloseOp.blocks[addCloseOp.blocks.length - 1].getGenerationStamp()); + // Record the max genstamp seen + for (Block b : addCloseOp.blocks) { + maxGenStamp = Math.max(maxGenStamp, b.getGenerationStamp()); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index a85b6c921ad..16740fca899 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -418,6 +418,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, startOpt = StartupOption.REGULAR; } boolean success = false; + writeLock(); try { // We shouldn't be calling saveNamespace if we've come up in standby state. if (fsImage.recoverTransitionRead(startOpt, this) && !haEnabled) { @@ -434,6 +435,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, if (!success) { fsImage.close(); } + writeUnlock(); } dir.imageLoadComplete(); } @@ -3244,9 +3246,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * @return true if in safe mode */ private synchronized boolean isOn() { - assert isConsistent() : " SafeMode: Inconsistent filesystem state: " - + "Total num of blocks, active blocks, or " - + "total safe blocks don't match."; + doConsistencyCheck(); return this.reached >= 0; } @@ -3362,6 +3362,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * Check and trigger safe mode if needed. */ private void checkMode() { + // Have to have write-lock since leaving safemode initializes + // repl queues, which requires write lock + assert hasWriteLock(); if (needEnter()) { enter(); // check if we are ready to initialize replication queues @@ -3541,16 +3544,26 @@ public class FSNamesystem implements Namesystem, FSClusterStats, /** * Checks consistency of the class state. - * This is costly and currently called only in assert. - * @throws IOException + * This is costly so only runs if asserts are enabled. */ - private boolean isConsistent() { - if (blockTotal == -1 && blockSafe == -1) { - return true; // manual safe mode - } + private void doConsistencyCheck() { + boolean assertsOn = false; + assert assertsOn = true; // set to true if asserts are on + if (!assertsOn) return; + + int activeBlocks = blockManager.getActiveBlockCount(); - return (blockTotal == activeBlocks) || - (blockSafe >= 0 && blockSafe <= blockTotal); + if (blockTotal == -1 && blockSafe == -1) { + return; // manual safe mode + } + if ((blockTotal != activeBlocks) && + !(blockSafe >= 0 && blockSafe <= blockTotal)) { + throw new AssertionError( + " SafeMode: Inconsistent filesystem state: " + + "SafeMode data: blockTotal=" + blockTotal + + " blockSafe=" + blockSafe + "; " + + "BlockManager data: active=" + activeBlocks); + } } } @@ -3663,7 +3676,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, /** * Set the total number of blocks in the system. */ - void setBlockTotal() { + public void setBlockTotal() { // safeMode is volatile, and may be set to null at any time SafeModeInfo safeMode = this.safeMode; if (safeMode == null) @@ -4822,10 +4835,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } public void notifyGenStampUpdate(long gs) { - LOG.info("=> notified of genstamp update for: " + gs); + if (LOG.isDebugEnabled()) { + LOG.debug("Generation stamp " + gs + " has been reached. " + + "Processing pending messages from DataNodes..."); + } DataNodeMessage msg = pendingDatanodeMessages.take(gs); while (msg != null) { - LOG.info("processing message: " + msg); + if (LOG.isDebugEnabled()) { + LOG.debug("Processing previously pending message: " + msg); + } try { switch (msg.getType()) { case BLOCK_RECEIVED_DELETE: diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java index b7b1adb479c..9bded332d14 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java @@ -49,6 +49,8 @@ public class EditLogTailer { private final FSImage image; private final FSEditLog editLog; + private volatile Throwable lastError = null; + public EditLogTailer(FSNamesystem namesystem) { this.tailerThread = new EditLogTailerThread(); this.namesystem = namesystem; @@ -81,6 +83,11 @@ public class EditLogTailer { tailerThread.interrupt(); } + @VisibleForTesting + public Throwable getLastError() { + return lastError; + } + public void catchupDuringFailover() throws IOException { Preconditions.checkState(tailerThread == null || !tailerThread.isAlive(), @@ -146,12 +153,19 @@ public class EditLogTailer { try { doTailEdits(); } catch (IOException e) { + if (e.getCause() instanceof RuntimeException) { + throw (RuntimeException)e.getCause(); + } else if (e.getCause() instanceof Error) { + throw (Error)e.getCause(); + } + // Will try again LOG.info("Got error, will try again.", e); } } catch (Throwable t) { // TODO(HA): What should we do in this case? Shutdown the standby NN? LOG.error("Edit log tailer received throwable", t); + lastError = t; } try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index ab7d0197d68..5bdc9300b3e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -1452,7 +1452,8 @@ public class MiniDFSCluster { sizes = NameNodeAdapter.getStats(nameNode.getNamesystem()); boolean isUp = false; synchronized (this) { - isUp = ((!nameNode.isInSafeMode() || !waitSafeMode) && sizes[0] != 0); + isUp = ((!nameNode.isInSafeMode() || !waitSafeMode) && + sizes[ClientProtocol.GET_STATS_CAPACITY_IDX] != 0); } return isUp; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java index 8ac86ca5dda..c3186292d99 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java @@ -410,7 +410,7 @@ public class TestDNFencing { return count; } - private void waitForDNDeletions(final MiniDFSCluster cluster) + static void waitForDNDeletions(final MiniDFSCluster cluster) throws TimeoutException, InterruptedException { GenericTestUtils.waitFor(new Supplier() { @Override @@ -426,7 +426,7 @@ public class TestDNFencing { } - private void waitForNNToIssueDeletions(final NameNode nn) + static void waitForNNToIssueDeletions(final NameNode nn) throws Exception { GenericTestUtils.waitFor(new Supplier() { @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java new file mode 100644 index 00000000000..b69d7c6db4c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -0,0 +1,334 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.TestDFSClientFailover; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests that exercise safemode in an HA cluster. + */ +public class TestHASafeMode { + private static final Log LOG = LogFactory.getLog(TestHASafeMode.class); + private static final int BLOCK_SIZE = 1024; + private NameNode nn0; + private NameNode nn1; + private FileSystem fs; + private MiniDFSCluster cluster; + + @Before + public void setupCluster() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(3) + .waitSafeMode(false) + .build(); + cluster.waitActive(); + + nn0 = cluster.getNameNode(0); + nn1 = cluster.getNameNode(1); + fs = TestDFSClientFailover.configureFailoverFs(cluster, conf); + + cluster.transitionToActive(0); + } + + @After + public void shutdownCluster() throws IOException { + if (cluster != null) { + assertNull(nn1.getNamesystem().getEditLogTailer().getLastError()); + cluster.shutdown(); + } + } + + private void restartStandby() throws IOException { + cluster.shutdownNameNode(1); + // Set the safemode extension to be lengthy, so that the tests + // can check the safemode message after the safemode conditions + // have been achieved, without being racy. + cluster.getConfiguration(1).setInt( + DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000); + cluster.restartNameNode(1); + nn1 = cluster.getNameNode(1); + nn1.getNamesystem().getEditLogTailer().setSleepTime(250); + nn1.getNamesystem().getEditLogTailer().interrupt(); + } + + /** + * Tests the case where, while a standby is down, more blocks are + * added to the namespace, but not rolled. So, when it starts up, + * it receives notification about the new blocks during + * the safemode extension period. + */ + @Test + public void testBlocksAddedBeforeStandbyRestart() throws Exception { + banner("Starting with NN0 active and NN1 standby, creating some blocks"); + DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L); + // Roll edit log so that, when the SBN restarts, it will load + // the namespace during startup. + nn0.getRpcServer().rollEditLog(); + + banner("Creating some blocks that won't be in the edit log"); + DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 1L); + + banner("Restarting standby"); + restartStandby(); + + // We expect it to be stuck in safemode (not the extension) because + // the block reports are delayed (since they include blocks + // from /test2 which are too-high genstamps. + String status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 0 needs additional 3 blocks to reach")); + + banner("Waiting for standby to catch up to active namespace"); + TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1); + + status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 8 has reached the threshold 0.9990 of " + + "total blocks 8. Safe mode will be turned off automatically")); + } + + /** + * Similar to {@link #testBlocksAddedBeforeStandbyRestart()} except that + * the new blocks are allocated after the SBN has restarted. So, the + * blocks were not present in the original block reports at startup + * but are reported separately by blockReceived calls. + */ + @Test + public void testBlocksAddedWhileInSafeMode() throws Exception { + banner("Starting with NN0 active and NN1 standby, creating some blocks"); + DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L); + // Roll edit log so that, when the SBN restarts, it will load + // the namespace during startup. + nn0.getRpcServer().rollEditLog(); + + banner("Restarting standby"); + restartStandby(); + + String status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 3 has reached the threshold 0.9990 of " + + "total blocks 3. Safe mode will be turned off automatically")); + + // Create a few blocks which will send blockReceived calls to the + // SBN. + banner("Creating some blocks while SBN is in safe mode"); + DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 1L); + + + banner("Waiting for standby to catch up to active namespace"); + TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1); + + status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 8 has reached the threshold 0.9990 of " + + "total blocks 8. Safe mode will be turned off automatically")); + } + + /** + * Test for the following case proposed by ATM: + * 1. Both NNs are up, one is active. There are 100 blocks. Both are + * out of safemode. + * 2. 10 block deletions get processed by NN1. NN2 enqueues these DN messages + * until it next reads from a checkpointed edits file. + * 3. NN2 gets restarted. Its queues are lost. + * 4. NN2 comes up, reads from all the finalized edits files. Concludes there + * should still be 100 blocks. + * 5. NN2 receives a block report from all the DNs, which only accounts for + * 90 blocks. It doesn't leave safemode. + * 6. NN1 dies or is transitioned to standby. + * 7. NN2 is transitioned to active. It reads all the edits from NN1. It now + * knows there should only be 90 blocks, but it's still in safemode. + * 8. NN2 doesn't ever recheck whether it should leave safemode. + * + * This is essentially the inverse of {@link #testBlocksAddedWhileStandbyShutdown()} + */ + @Test + public void testBlocksRemovedBeforeStandbyRestart() throws Exception { + banner("Starting with NN0 active and NN1 standby, creating some blocks"); + DFSTestUtil.createFile(fs, new Path("/test"), 5*BLOCK_SIZE, (short) 3, 1L); + + // Roll edit log so that, when the SBN restarts, it will load + // the namespace during startup. + nn0.getRpcServer().rollEditLog(); + + // Delete those blocks again, so they won't get reported to the SBN + // once it starts up + banner("Removing the blocks without rolling the edit log"); + fs.delete(new Path("/test"), true); + BlockManagerTestUtil.computeAllPendingWork( + nn0.getNamesystem().getBlockManager()); + cluster.triggerHeartbeats(); + + banner("Restarting standby"); + restartStandby(); + String status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 0 needs additional 5 blocks to reach")); + + banner("Waiting for standby to catch up to active namespace"); + TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1); + status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 0 has reached the threshold 0.9990 of " + + "total blocks 0. Safe mode will be turned off automatically")); + } + + /** + * Similar to {@link #testBlocksRemovedBeforeStandbyRestart()} except that + * the blocks are removed after the SBN has restarted. So, the + * blocks were present in the original block reports at startup + * but are deleted separately later by deletion reports. + */ + @Test + public void testBlocksRemovedWhileInSafeMode() throws Exception { + banner("Starting with NN0 active and NN1 standby, creating some blocks"); + DFSTestUtil.createFile(fs, new Path("/test"), 10*BLOCK_SIZE, (short) 3, 1L); + + // Roll edit log so that, when the SBN restarts, it will load + // the namespace during startup. + nn0.getRpcServer().rollEditLog(); + + banner("Restarting standby"); + restartStandby(); + + // It will initially have all of the blocks necessary. + String status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 10 has reached the threshold 0.9990 of " + + "total blocks 10. Safe mode will be turned off automatically")); + + // Delete those blocks while the SBN is in safe mode - this + // should reduce it back below the threshold + banner("Removing the blocks without rolling the edit log"); + fs.delete(new Path("/test"), true); + BlockManagerTestUtil.computeAllPendingWork( + nn0.getNamesystem().getBlockManager()); + + banner("Triggering deletions on DNs and Deletion Reports"); + cluster.triggerHeartbeats(); + TestDNFencing.waitForDNDeletions(cluster); + cluster.triggerDeletionReports(); + + status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 0 needs additional 10 blocks")); + + banner("Waiting for standby to catch up to active namespace"); + TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1); + + status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 0 has reached the threshold 0.9990 of " + + "total blocks 0. Safe mode will be turned off automatically")); + } + + /** + * Set up a namesystem with several edits, both deletions and + * additions, and failover to a new NN while that NN is in + * safemode. Ensure that it will exit safemode. + */ + @Test + public void testComplexFailoverIntoSafemode() throws Exception { + banner("Starting with NN0 active and NN1 standby, creating some blocks"); + DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L); + // Roll edit log so that, when the SBN restarts, it will load + // the namespace during startup and enter safemode. + nn0.getRpcServer().rollEditLog(); + + banner("Creating some blocks that won't be in the edit log"); + DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 1L); + + banner("Deleting the original blocks"); + fs.delete(new Path("/test"), true); + + banner("Restarting standby"); + restartStandby(); + + // We expect it to be stuck in safemode (not the extension) because + // the block reports are delayed (since they include blocks + // from /test2 which are too-high genstamps. + String status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 0 needs additional 3 blocks to reach")); + + // Initiate a failover into it while it's in safemode + banner("Initiating a failover into NN1 in safemode"); + NameNodeAdapter.abortEditLogs(nn0); + cluster.transitionToActive(1); + + status = nn1.getNamesystem().getSafemode(); + assertTrue("Bad safemode status: '" + status + "'", + status.startsWith( + "Safe mode is ON." + + "The reported blocks 5 has reached the threshold 0.9990 of " + + "total blocks 5. Safe mode will be turned off automatically")); + } + + /** + * Print a big banner in the test log to make debug easier. + */ + static void banner(String string) { + LOG.info("\n\n\n\n================================================\n" + + string + "\n" + + "==================================================\n\n"); + } + +}