From 7cac807eb336a62789bc616b4e9f9193b7d7d6c9 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Wed, 6 Apr 2016 10:42:59 -0700 Subject: [PATCH] HDFS-10192. Namenode safemode not coming out during failover. Contributed by Brahma Reddy Battula. (cherry picked from commit 221b3a8722f84f8e9ad0a98eea38a12cc4ad2f24) --- .../server/blockmanagement/BlockManager.java | 2 +- .../hdfs/server/namenode/FSNamesystem.java | 1 + .../TestBlockManagerSafeMode.java | 14 +++++++- .../server/namenode/ha/TestHASafeMode.java | 35 +++++++++++++++++++ 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index e5ac4844ce5..4f68c28cc8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1787,7 +1787,7 @@ public class BlockManager implements BlockStatsMXBean { return bmSafeMode.leaveSafeMode(force); } - void checkSafeMode() { + public void checkSafeMode() { bmSafeMode.checkSafeMode(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 1fffbc277d8..5215d840e74 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1141,6 +1141,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, } } finally { startingActiveService = false; + blockManager.checkSafeMode(); writeUnlock(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java index 18fbab77023..7faaca08c2e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java @@ -65,6 +65,7 @@ public class TestBlockManagerSafeMode { private static final long BLOCK_THRESHOLD = (long)(BLOCK_TOTAL * THRESHOLD); private static final int EXTENSION = 1000; // 1 second + private FSNamesystem fsn; private BlockManager bm; private DatanodeManager dn; private BlockManagerSafeMode bmSafeMode; @@ -89,7 +90,7 @@ public class TestBlockManagerSafeMode { conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, DATANODE_NUM); - FSNamesystem fsn = mock(FSNamesystem.class); + fsn = mock(FSNamesystem.class); doReturn(true).when(fsn).hasWriteLock(); doReturn(true).when(fsn).hasReadLock(); doReturn(true).when(fsn).isRunning(); @@ -162,6 +163,17 @@ public class TestBlockManagerSafeMode { setBlockSafe(BLOCK_THRESHOLD); bmSafeMode.checkSafeMode(); assertEquals(BMSafeModeStatus.EXTENSION, getSafeModeStatus()); + + // should stay in PENDING_THRESHOLD during transitionToActive + doReturn(true).when(fsn).inTransitionToActive(); + Whitebox.setInternalState(bmSafeMode, "extension", 0); + setSafeModeStatus(BMSafeModeStatus.PENDING_THRESHOLD); + setBlockSafe(BLOCK_THRESHOLD); + bmSafeMode.checkSafeMode(); + assertEquals(BMSafeModeStatus.PENDING_THRESHOLD, getSafeModeStatus()); + doReturn(false).when(fsn).inTransitionToActive(); + bmSafeMode.checkSafeMode(); + assertEquals(BMSafeModeStatus.OFF, getSafeModeStatus()); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index 4b1d27d5b90..8b8343c7c7c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -851,4 +851,39 @@ public class TestHASafeMode { cluster.shutdown(); } } + + @Test(timeout = 60000) + public void testSafeModeExitAfterTransition() throws Exception { + DFSTestUtil.createFile(fs, new Path("/test"), 5 * BLOCK_SIZE, (short) 3, + 1L); + banner("Stopping standby"); + cluster.shutdownNameNode(1); + DFSTestUtil.createFile(fs, new Path("/test2"), 3 * BLOCK_SIZE, (short) 3, + 1L); + // Roll edit logs to be read by standby + nn0.getRpcServer().rollEditLog(); + fs.delete(new Path("/test"), true); + // Wait till the blocks are deleted from all DNs + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return cluster.getNamesystem(0).getBlockManager() + .getPendingDeletionBlocksCount() == 0; + } + }, 1000, 10000); + restartStandby(); + // Wait till all the datanodes are registered. + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return cluster.getNamesystem(1).getNumLiveDataNodes() == 3; + } + }, 1000, 10000); + cluster.triggerBlockReports(); + NameNodeAdapter.abortEditLogs(nn0); + cluster.shutdownNameNode(0); + banner(nn1.getNamesystem().getSafemode()); + cluster.transitionToActive(1); + assertSafeMode(nn1, 3, 3, 3, 0); + } }