HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826. Contributed by Matt Foley.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1141658 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f853b52a3b
commit
5147e283ad
|
@ -554,6 +554,9 @@ Trunk (unreleased changes)
|
|||
|
||||
BUG FIXES
|
||||
|
||||
HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826.
|
||||
(mattf)
|
||||
|
||||
HDFS-2061. Two minor bugs in BlockManager block report processing. (mattf)
|
||||
|
||||
HDFS-1449. Fix test failures - ExtendedBlock must return
|
||||
|
|
|
@ -1228,6 +1228,11 @@ public class FSEditLog implements NNStorageListener {
|
|||
@Override // NNStorageListener
|
||||
public synchronized void errorOccurred(StorageDirectory sd)
|
||||
throws IOException {
|
||||
if (editStreams == null) {
|
||||
//errors can occur on storage directories
|
||||
//before edit streams have been set up
|
||||
return;
|
||||
}
|
||||
ArrayList<EditLogOutputStream> errorStreams
|
||||
= new ArrayList<EditLogOutputStream>();
|
||||
|
||||
|
|
|
@ -397,7 +397,12 @@ public class FSImage implements NNStorageListener, Closeable {
|
|||
LOG.info("Upgrade of " + sd.getRoot() + " is complete.");
|
||||
}
|
||||
isUpgradeFinalized = false;
|
||||
storage.reportErrorsOnDirectories(errorSDs);
|
||||
if (!errorSDs.isEmpty()) {
|
||||
storage.reportErrorsOnDirectories(errorSDs);
|
||||
//during upgrade, it's a fatal error to fail any storage directory
|
||||
throw new IOException("Upgrade failed in " + errorSDs.size()
|
||||
+ " storage directory(ies), previously logged.");
|
||||
}
|
||||
storage.initializeDistributedUpgrade();
|
||||
editLog.open();
|
||||
}
|
||||
|
|
|
@ -905,7 +905,7 @@ public class NNStorage extends Storage implements Closeable {
|
|||
*/
|
||||
void reportErrorsOnDirectory(StorageDirectory sd)
|
||||
throws IOException {
|
||||
LOG.warn("Error reported on storage directory " + sd);
|
||||
LOG.error("Error reported on storage directory " + sd);
|
||||
|
||||
String lsd = listStorageDirectories();
|
||||
LOG.debug("current list of storage dirs:" + lsd);
|
||||
|
@ -914,12 +914,12 @@ public class NNStorage extends Storage implements Closeable {
|
|||
listener.errorOccurred(sd);
|
||||
}
|
||||
|
||||
LOG.info("About to remove corresponding storage: "
|
||||
LOG.warn("About to remove corresponding storage: "
|
||||
+ sd.getRoot().getAbsolutePath());
|
||||
try {
|
||||
sd.unlock();
|
||||
} catch (Exception e) {
|
||||
LOG.info("Unable to unlock bad storage directory: "
|
||||
LOG.warn("Unable to unlock bad storage directory: "
|
||||
+ sd.getRoot().getPath(), e);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,18 +22,19 @@ import static org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType.NAME_N
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.hdfs.protocol.FSConstants;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
|
||||
import org.apache.hadoop.hdfs.server.namenode.TestParallelImageWrite;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
|
@ -44,8 +45,7 @@ import static org.junit.Assert.*;
|
|||
*/
|
||||
public class TestDFSUpgrade {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.hdfs.TestDFSUpgrade");
|
||||
private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName());
|
||||
private Configuration conf;
|
||||
private int testCounter = 0;
|
||||
private MiniDFSCluster cluster = null;
|
||||
|
@ -111,11 +111,27 @@ public class TestDFSUpgrade {
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to start a NameNode with the given operation. Starting
|
||||
* the NameNode should throw an exception.
|
||||
*/
|
||||
void startNameNodeShouldFail(StartupOption operation) {
|
||||
startNameNodeShouldFail(operation, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to start a NameNode with the given operation. Starting
|
||||
* the NameNode should throw an exception.
|
||||
* @param operation - NameNode startup operation
|
||||
* @param exceptionClass - if non-null, will check that the caught exception
|
||||
* is assignment-compatible with exceptionClass
|
||||
* @param messagePattern - if non-null, will check that a substring of the
|
||||
* message from the caught exception matches this pattern, via the
|
||||
* {@link Matcher#find()} method.
|
||||
*/
|
||||
void startNameNodeShouldFail(StartupOption operation,
|
||||
Class<? extends Exception> exceptionClass, Pattern messagePattern) {
|
||||
try {
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
|
||||
.startupOption(operation)
|
||||
|
@ -123,9 +139,23 @@ public class TestDFSUpgrade {
|
|||
.manageDataDfsDirs(false)
|
||||
.manageNameDfsDirs(false)
|
||||
.build(); // should fail
|
||||
throw new AssertionError("NameNode should have failed to start");
|
||||
} catch (Exception expected) {
|
||||
// expected
|
||||
fail("NameNode should have failed to start");
|
||||
|
||||
} catch (Exception e) {
|
||||
// expect exception
|
||||
if (exceptionClass != null) {
|
||||
assertTrue("Caught exception is not of expected class "
|
||||
+ exceptionClass.getSimpleName() + ": "
|
||||
+ StringUtils.stringifyException(e),
|
||||
exceptionClass.isInstance(e));
|
||||
}
|
||||
if (messagePattern != null) {
|
||||
assertTrue("Caught exception message string does not match expected pattern \""
|
||||
+ messagePattern.pattern() + "\" : "
|
||||
+ StringUtils.stringifyException(e),
|
||||
messagePattern.matcher(e.getMessage()).find());
|
||||
}
|
||||
LOG.info("Successfully detected expected NameNode startup failure.");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -155,6 +185,11 @@ public class TestDFSUpgrade {
|
|||
.build();
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void initialize() throws Exception {
|
||||
UpgradeUtilities.initialize();
|
||||
}
|
||||
|
||||
/**
|
||||
* This test attempts to upgrade the NameNode and DataNode under
|
||||
* a number of valid and invalid conditions.
|
||||
|
@ -162,8 +197,6 @@ public class TestDFSUpgrade {
|
|||
@Test
|
||||
public void testUpgrade() throws Exception {
|
||||
File[] baseDirs;
|
||||
UpgradeUtilities.initialize();
|
||||
|
||||
StorageInfo storageInfo = null;
|
||||
for (int numDirs = 1; numDirs <= 2; numDirs++) {
|
||||
conf = new HdfsConfiguration();
|
||||
|
@ -312,6 +345,30 @@ public class TestDFSUpgrade {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Stand-alone test to detect failure of one SD during parallel upgrade.
|
||||
* At this time, can only be done with manual hack of {@link FSImage.doUpgrade()}
|
||||
*/
|
||||
@Ignore
|
||||
public void testUpgrade4() throws Exception {
|
||||
int numDirs = 4;
|
||||
conf = new HdfsConfiguration();
|
||||
conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
|
||||
conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
|
||||
String[] nameNodeDirs = conf.getStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
|
||||
|
||||
log("NameNode upgrade with one bad storage dir", numDirs);
|
||||
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
|
||||
try {
|
||||
// assert("storage dir has been prepared for failure before reaching this point");
|
||||
startNameNodeShouldFail(StartupOption.UPGRADE, IOException.class,
|
||||
Pattern.compile("failed in 1 storage"));
|
||||
} finally {
|
||||
// assert("storage dir shall be returned to normal state before exiting");
|
||||
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expected=IOException.class)
|
||||
public void testUpgradeFromPreUpgradeLVFails() throws IOException {
|
||||
// Upgrade from versions prior to Storage#LAST_UPGRADABLE_LAYOUT_VERSION
|
||||
|
@ -320,6 +377,7 @@ public class TestDFSUpgrade {
|
|||
fail("Expected IOException is not thrown");
|
||||
}
|
||||
|
||||
@Ignore
|
||||
public void test203LayoutVersion() {
|
||||
for (int lv : Storage.LAYOUT_VERSIONS_203) {
|
||||
assertTrue(Storage.is203LayoutVersion(lv));
|
||||
|
@ -327,7 +385,9 @@ public class TestDFSUpgrade {
|
|||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new TestDFSUpgrade().testUpgrade();
|
||||
TestDFSUpgrade t = new TestDFSUpgrade();
|
||||
TestDFSUpgrade.initialize();
|
||||
t.testUpgrade();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue