HDFS-2747. Entering safe mode after starting SBN can NPE. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-01-16 21:46:05 +00:00
parent 1c24ae0cd8
commit 2f26475a39
3 changed files with 87 additions and 15 deletions

View File

@ -105,3 +105,5 @@ HDFS-2766. Test for case where standby partially reads log and then performs che
HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
HDFS-2789. TestHAAdmin.testFailover is failing (eli)
HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G via todd)

View File

@ -3774,21 +3774,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void enterSafeMode(boolean resourcesLow) throws IOException {
writeLock();
try {
// Ensure that any concurrent operations have been fully synced
// before entering safe mode. This ensures that the FSImage
// is entirely stable on disk as soon as we're in safe mode.
getEditLog().logSyncAll();
if (!isInSafeMode()) {
safeMode = new SafeModeInfo(resourcesLow);
return;
}
if (resourcesLow) {
safeMode.setResourcesLow();
}
safeMode.setManual();
getEditLog().logSyncAll();
NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
+ safeMode.getTurnOffTip());
// Ensure that any concurrent operations have been fully synced
// before entering safe mode. This ensures that the FSImage
// is entirely stable on disk as soon as we're in safe mode.
boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
// Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
// logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
if (isEditlogOpenForWrite) {
getEditLog().logSyncAll();
}
if (!isInSafeMode()) {
safeMode = new SafeModeInfo(resourcesLow);
return;
}
if (resourcesLow) {
safeMode.setResourcesLow();
}
safeMode.setManual();
if (isEditlogOpenForWrite) {
getEditLog().logSyncAll();
}
NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
+ safeMode.getTurnOffTip());
} finally {
writeUnlock();
}

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.junit.After;
@ -95,6 +96,68 @@ public class TestHASafeMode {
nn1.getNamesystem().getEditLogTailer().interrupt();
}
/**
* Test case for enter safemode in active namenode, when it is already in startup safemode.
* It is a regression test for HDFS-2747.
*/
@Test
public void testEnterSafeModeInANNShouldNotThrowNPE() throws Exception {
banner("Restarting active");
restartActive();
FSNamesystem namesystem = nn0.getNamesystem();
String status = namesystem.getSafemode();
assertTrue("Bad safemode status: '" + status + "'", status
.startsWith("Safe mode is ON."));
NameNodeAdapter.enterSafeMode(nn0, false);
assertTrue("Failed to enter into safemode in active", namesystem
.isInSafeMode());
NameNodeAdapter.enterSafeMode(nn0, false);
assertTrue("Failed to enter into safemode in active", namesystem
.isInSafeMode());
}
/**
* Test case for enter safemode in standby namenode, when it is already in startup safemode.
* It is a regression test for HDFS-2747.
*/
@Test
public void testEnterSafeModeInSBNShouldNotThrowNPE() throws Exception {
banner("Starting with NN0 active and NN1 standby, creating some blocks");
DFSTestUtil
.createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
// Roll edit log so that, when the SBN restarts, it will load
// the namespace during startup and enter safemode.
nn0.getRpcServer().rollEditLog();
banner("Creating some blocks that won't be in the edit log");
DFSTestUtil.createFile(fs, new Path("/test2"), 5 * BLOCK_SIZE, (short) 3,
1L);
banner("Deleting the original blocks");
fs.delete(new Path("/test"), true);
banner("Restarting standby");
restartStandby();
FSNamesystem namesystem = nn1.getNamesystem();
String status = namesystem.getSafemode();
assertTrue("Bad safemode status: '" + status + "'", status
.startsWith("Safe mode is ON."));
NameNodeAdapter.enterSafeMode(nn1, false);
assertTrue("Failed to enter into safemode in standby", namesystem
.isInSafeMode());
NameNodeAdapter.enterSafeMode(nn1, false);
assertTrue("Failed to enter into safemode in standby", namesystem
.isInSafeMode());
}
private void restartActive() throws IOException {
cluster.shutdownNameNode(0);
// Set the safemode extension to be lengthy, so that the tests
// can check the safemode message after the safemode conditions
// have been achieved, without being racy.
cluster.getConfiguration(0).setInt(
DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
cluster.restartNameNode(0);
nn0 = cluster.getNameNode(0);
}
/**
* Tests the case where, while a standby is down, more blocks are
* added to the namespace, but not rolled. So, when it starts up,