HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. Contributed by Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244753 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jitendra Nath Pandey 2012-02-15 22:00:18 +00:00
parent 1fb0ab92f8
commit 3c145d3492
5 changed files with 55 additions and 23 deletions

View File

@ -202,3 +202,5 @@ HDFS-2947. On startup NN throws an NPE in the metrics system. (atm)
HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. (atm)
HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd)
HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. (Bikas Saha via jitendra)

View File

@ -221,7 +221,7 @@ private void initJournals(List<URI> dirs) {
if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
StorageDirectory sd = storage.getStorageDirectory(u);
if (sd != null) {
journalSet.add(new FileJournalManager(sd), required);
journalSet.add(new FileJournalManager(sd, storage), required);
}
} else {
journalSet.add(createJournal(u), required);

View File

@ -52,6 +52,7 @@ class FileJournalManager implements JournalManager {
private static final Log LOG = LogFactory.getLog(FileJournalManager.class);
private final StorageDirectory sd;
private final NNStorage storage;
private int outputBufferCapacity = 512*1024;
private static final Pattern EDITS_REGEX = Pattern.compile(
@ -65,8 +66,9 @@ class FileJournalManager implements JournalManager {
StoragePurger purger
= new NNStorageRetentionManager.DeletionStoragePurger();
public FileJournalManager(StorageDirectory sd) {
public FileJournalManager(StorageDirectory sd, NNStorage storage) {
this.sd = sd;
this.storage = storage;
}
@Override
@ -75,11 +77,16 @@ public void close() throws IOException {}
@Override
synchronized public EditLogOutputStream startLogSegment(long txid)
throws IOException {
try {
currentInProgress = NNStorage.getInProgressEditsFile(sd, txid);
EditLogOutputStream stm = new EditLogFileOutputStream(currentInProgress,
outputBufferCapacity);
stm.create();
return stm;
} catch (IOException e) {
storage.reportErrorsOnDirectory(sd);
throw e;
}
}
@Override
@ -95,6 +102,7 @@ synchronized public void finalizeLogSegment(long firstTxId, long lastTxId)
"Can't finalize edits file " + inprogressFile + " since finalized file " +
"already exists");
if (!inprogressFile.renameTo(dstFile)) {
storage.reportErrorsOnDirectory(sd);
throw new IllegalStateException("Unable to finalize edits file " + inprogressFile);
}
if (inprogressFile.equals(currentInProgress)) {

View File

@ -29,6 +29,7 @@
import java.io.FilenameFilter;
import java.io.IOException;
import org.junit.Test;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
@ -59,7 +60,7 @@ public void testNormalOperation() throws IOException {
long numJournals = 0;
for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) {
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
numJournals++;
}
@ -79,7 +80,7 @@ public void testInprogressRecovery() throws IOException {
5, new AbortSpec(5, 0));
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL,
jm.getNumberOfTransactions(1, true));
}
@ -102,16 +103,16 @@ public void testInprogressRecoveryMixed() throws IOException {
5, new AbortSpec(5, 1));
Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
StorageDirectory sd = dirs.next();
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
sd = dirs.next();
jm = new FileJournalManager(sd);
jm = new FileJournalManager(sd, storage);
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
true));
sd = dirs.next();
jm = new FileJournalManager(sd);
jm = new FileJournalManager(sd, storage);
assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
}
@ -135,17 +136,17 @@ public void testInprogressRecoveryAll() throws IOException {
new AbortSpec(5, 2));
Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
StorageDirectory sd = dirs.next();
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
true));
sd = dirs.next();
jm = new FileJournalManager(sd);
jm = new FileJournalManager(sd, storage);
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
true));
sd = dirs.next();
jm = new FileJournalManager(sd);
jm = new FileJournalManager(sd, storage);
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
true));
}
@ -162,6 +163,25 @@ private void corruptAfterStartSegment(File f) throws IOException {
raf.close();
}
@Test(expected=IllegalStateException.class)
public void testFinalizeErrorReportedToNNStorage() throws IOException, InterruptedException {
File f = new File(TestEditLog.TEST_DIR + "/filejournaltestError");
// abort after 10th roll
NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()),
10, new AbortSpec(10, 0));
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
FileJournalManager jm = new FileJournalManager(sd, storage);
String sdRootPath = sd.getRoot().getAbsolutePath();
FileUtil.chmod(sdRootPath, "-w", true);
try {
jm.finalizeLogSegment(0, 1);
} finally {
assertTrue(storage.getRemovedStorageDirs().contains(sd));
FileUtil.chmod(sdRootPath, "+w", true);
}
}
/**
* Test that we can read from a stream created by FileJournalManager.
* Create a single edits directory, failing it on the final roll.
@ -176,7 +196,7 @@ public void testReadFromStream() throws IOException {
10, new AbortSpec(10, 0));
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
long expectedTotalTxnCount = TXNS_PER_ROLL*10 + TXNS_PER_FAIL;
assertEquals(expectedTotalTxnCount, jm.getNumberOfTransactions(1, true));
@ -211,7 +231,7 @@ public void testAskForTransactionsMidfile() throws IOException {
10);
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
// 10 rolls, so 11 rolled files, 110 txids total.
final int TOTAL_TXIDS = 10 * 11;
@ -248,7 +268,7 @@ public boolean accept(File dir, String name) {
assertEquals(1, files.length);
assertTrue(files[0].delete());
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
assertEquals(startGapTxId-1, jm.getNumberOfTransactions(1, true));
try {
@ -286,7 +306,7 @@ public boolean accept(File dir, String name) {
corruptAfterStartSegment(files[0]);
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
assertEquals(10*TXNS_PER_ROLL+1,
jm.getNumberOfTransactions(1, true));
}
@ -300,7 +320,8 @@ public void testGetRemoteEditLog() throws IOException {
NNStorage.getInProgressEditsFileName(201),
NNStorage.getFinalizedEditsFileName(1001, 1100));
FileJournalManager fjm = new FileJournalManager(sd);
// passing null for NNStorage because this unit test will not use it
FileJournalManager fjm = new FileJournalManager(sd, null);
assertEquals("[1,100],[101,200],[1001,1100]", getLogsAsString(fjm, 1));
assertEquals("[101,200],[1001,1100]", getLogsAsString(fjm, 101));
assertEquals("[1001,1100]", getLogsAsString(fjm, 201));
@ -336,7 +357,7 @@ public void testReadFromMiddleOfEditLog() throws CorruptionException,
10);
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
EditLogInputStream elis = jm.getInputStream(5, true);
FSEditLogOp op = elis.readOp();
@ -357,7 +378,7 @@ public void testExcludeInProgressStreams() throws CorruptionException,
10, false);
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
FileJournalManager jm = new FileJournalManager(sd);
FileJournalManager jm = new FileJournalManager(sd, storage);
// If we exclude the in-progess stream, we should only have 100 tx.
assertEquals(100, jm.getNumberOfTransactions(1, false));

View File

@ -292,8 +292,9 @@ public FSEditLog mockEditLog(StoragePurger purger) {
for (FakeRoot root : dirRoots.values()) {
if (!root.type.isOfType(NameNodeDirType.EDITS)) continue;
// passing null NNStorage for unit test because it does not use it
FileJournalManager fjm = new FileJournalManager(
root.mockStorageDir());
root.mockStorageDir(), null);
fjm.purger = purger;
jms.add(fjm);
}