HDFS-2747. Entering safe mode after starting SBN can NPE. Contributed by Uma Maheswara Rao G.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1c24ae0cd8
commit
2f26475a39
|
@ -105,3 +105,5 @@ HDFS-2766. Test for case where standby partially reads log and then performs che
|
|||
HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
|
||||
|
||||
HDFS-2789. TestHAAdmin.testFailover is failing (eli)
|
||||
|
||||
HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G via todd)
|
||||
|
|
|
@ -3774,21 +3774,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
void enterSafeMode(boolean resourcesLow) throws IOException {
|
||||
writeLock();
|
||||
try {
|
||||
// Ensure that any concurrent operations have been fully synced
|
||||
// before entering safe mode. This ensures that the FSImage
|
||||
// is entirely stable on disk as soon as we're in safe mode.
|
||||
getEditLog().logSyncAll();
|
||||
if (!isInSafeMode()) {
|
||||
safeMode = new SafeModeInfo(resourcesLow);
|
||||
return;
|
||||
}
|
||||
if (resourcesLow) {
|
||||
safeMode.setResourcesLow();
|
||||
}
|
||||
safeMode.setManual();
|
||||
getEditLog().logSyncAll();
|
||||
NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
|
||||
+ safeMode.getTurnOffTip());
|
||||
// Ensure that any concurrent operations have been fully synced
|
||||
// before entering safe mode. This ensures that the FSImage
|
||||
// is entirely stable on disk as soon as we're in safe mode.
|
||||
boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
|
||||
// Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
|
||||
// logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
|
||||
if (isEditlogOpenForWrite) {
|
||||
getEditLog().logSyncAll();
|
||||
}
|
||||
if (!isInSafeMode()) {
|
||||
safeMode = new SafeModeInfo(resourcesLow);
|
||||
return;
|
||||
}
|
||||
if (resourcesLow) {
|
||||
safeMode.setResourcesLow();
|
||||
}
|
||||
safeMode.setManual();
|
||||
if (isEditlogOpenForWrite) {
|
||||
getEditLog().logSyncAll();
|
||||
}
|
||||
NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
|
||||
+ safeMode.getTurnOffTip());
|
||||
} finally {
|
||||
writeUnlock();
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
|
|||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.junit.After;
|
||||
|
@ -95,6 +96,68 @@ public class TestHASafeMode {
|
|||
nn1.getNamesystem().getEditLogTailer().interrupt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test case for enter safemode in active namenode, when it is already in startup safemode.
|
||||
* It is a regression test for HDFS-2747.
|
||||
*/
|
||||
@Test
|
||||
public void testEnterSafeModeInANNShouldNotThrowNPE() throws Exception {
|
||||
banner("Restarting active");
|
||||
restartActive();
|
||||
FSNamesystem namesystem = nn0.getNamesystem();
|
||||
String status = namesystem.getSafemode();
|
||||
assertTrue("Bad safemode status: '" + status + "'", status
|
||||
.startsWith("Safe mode is ON."));
|
||||
NameNodeAdapter.enterSafeMode(nn0, false);
|
||||
assertTrue("Failed to enter into safemode in active", namesystem
|
||||
.isInSafeMode());
|
||||
NameNodeAdapter.enterSafeMode(nn0, false);
|
||||
assertTrue("Failed to enter into safemode in active", namesystem
|
||||
.isInSafeMode());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test case for enter safemode in standby namenode, when it is already in startup safemode.
|
||||
* It is a regression test for HDFS-2747.
|
||||
*/
|
||||
@Test
|
||||
public void testEnterSafeModeInSBNShouldNotThrowNPE() throws Exception {
|
||||
banner("Starting with NN0 active and NN1 standby, creating some blocks");
|
||||
DFSTestUtil
|
||||
.createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
|
||||
// Roll edit log so that, when the SBN restarts, it will load
|
||||
// the namespace during startup and enter safemode.
|
||||
nn0.getRpcServer().rollEditLog();
|
||||
banner("Creating some blocks that won't be in the edit log");
|
||||
DFSTestUtil.createFile(fs, new Path("/test2"), 5 * BLOCK_SIZE, (short) 3,
|
||||
1L);
|
||||
banner("Deleting the original blocks");
|
||||
fs.delete(new Path("/test"), true);
|
||||
banner("Restarting standby");
|
||||
restartStandby();
|
||||
FSNamesystem namesystem = nn1.getNamesystem();
|
||||
String status = namesystem.getSafemode();
|
||||
assertTrue("Bad safemode status: '" + status + "'", status
|
||||
.startsWith("Safe mode is ON."));
|
||||
NameNodeAdapter.enterSafeMode(nn1, false);
|
||||
assertTrue("Failed to enter into safemode in standby", namesystem
|
||||
.isInSafeMode());
|
||||
NameNodeAdapter.enterSafeMode(nn1, false);
|
||||
assertTrue("Failed to enter into safemode in standby", namesystem
|
||||
.isInSafeMode());
|
||||
}
|
||||
|
||||
private void restartActive() throws IOException {
|
||||
cluster.shutdownNameNode(0);
|
||||
// Set the safemode extension to be lengthy, so that the tests
|
||||
// can check the safemode message after the safemode conditions
|
||||
// have been achieved, without being racy.
|
||||
cluster.getConfiguration(0).setInt(
|
||||
DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
|
||||
cluster.restartNameNode(0);
|
||||
nn0 = cluster.getNameNode(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the case where, while a standby is down, more blocks are
|
||||
* added to the namespace, but not rolled. So, when it starts up,
|
||||
|
|
Loading…
Reference in New Issue