diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt index d6eb3c83f7b..6a3d45bdc26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt @@ -202,3 +202,5 @@ HDFS-2947. On startup NN throws an NPE in the metrics system. (atm) HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. (atm) HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd) + +HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. (Bikas Saha via jitendra) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 2c72a7d5f2b..5cd8be26a95 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -221,7 +221,7 @@ private void initJournals(List dirs) { if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) { StorageDirectory sd = storage.getStorageDirectory(u); if (sd != null) { - journalSet.add(new FileJournalManager(sd), required); + journalSet.add(new FileJournalManager(sd, storage), required); } } else { journalSet.add(createJournal(u), required); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java index 1eca2797b44..eaaf65b5fc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java @@ -52,6 +52,7 @@ class FileJournalManager implements JournalManager { private static final Log LOG = LogFactory.getLog(FileJournalManager.class); private final StorageDirectory sd; + private final NNStorage storage; private int outputBufferCapacity = 512*1024; private static final Pattern EDITS_REGEX = Pattern.compile( @@ -65,8 +66,9 @@ class FileJournalManager implements JournalManager { StoragePurger purger = new NNStorageRetentionManager.DeletionStoragePurger(); - public FileJournalManager(StorageDirectory sd) { + public FileJournalManager(StorageDirectory sd, NNStorage storage) { this.sd = sd; + this.storage = storage; } @Override @@ -75,11 +77,16 @@ public void close() throws IOException {} @Override synchronized public EditLogOutputStream startLogSegment(long txid) throws IOException { - currentInProgress = NNStorage.getInProgressEditsFile(sd, txid); - EditLogOutputStream stm = new EditLogFileOutputStream(currentInProgress, - outputBufferCapacity); - stm.create(); - return stm; + try { + currentInProgress = NNStorage.getInProgressEditsFile(sd, txid); + EditLogOutputStream stm = new EditLogFileOutputStream(currentInProgress, + outputBufferCapacity); + stm.create(); + return stm; + } catch (IOException e) { + storage.reportErrorsOnDirectory(sd); + throw e; + } } @Override @@ -95,6 +102,7 @@ synchronized public void finalizeLogSegment(long firstTxId, long lastTxId) "Can't finalize edits file " + inprogressFile + " since finalized file " + "already exists"); if (!inprogressFile.renameTo(dstFile)) { + storage.reportErrorsOnDirectory(sd); throw new IllegalStateException("Unable to finalize edits file " + inprogressFile); } if (inprogressFile.equals(currentInProgress)) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java index def29365776..0ac194439d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java @@ -29,6 +29,7 @@ import java.io.FilenameFilter; import java.io.IOException; import org.junit.Test; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; @@ -59,7 +60,7 @@ public void testNormalOperation() throws IOException { long numJournals = 0; for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) { - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true)); numJournals++; } @@ -79,7 +80,7 @@ public void testInprogressRecovery() throws IOException { 5, new AbortSpec(5, 0)); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1, true)); } @@ -102,16 +103,16 @@ public void testInprogressRecoveryMixed() throws IOException { 5, new AbortSpec(5, 1)); Iterator dirs = storage.dirIterator(NameNodeDirType.EDITS); StorageDirectory sd = dirs.next(); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true)); sd = dirs.next(); - jm = new FileJournalManager(sd); + jm = new FileJournalManager(sd, storage); assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1, true)); sd = dirs.next(); - jm = new FileJournalManager(sd); + jm = new FileJournalManager(sd, storage); assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true)); } @@ -135,17 +136,17 @@ public void testInprogressRecoveryAll() throws IOException { new AbortSpec(5, 2)); Iterator dirs = storage.dirIterator(NameNodeDirType.EDITS); StorageDirectory sd = dirs.next(); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1, true)); sd = dirs.next(); - jm = new FileJournalManager(sd); + jm = new FileJournalManager(sd, storage); assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1, true)); sd = dirs.next(); - jm = new FileJournalManager(sd); + jm = new FileJournalManager(sd, storage); assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1, true)); } @@ -161,6 +162,25 @@ private void corruptAfterStartSegment(File f) throws IOException { } raf.close(); } + + @Test(expected=IllegalStateException.class) + public void testFinalizeErrorReportedToNNStorage() throws IOException, InterruptedException { + File f = new File(TestEditLog.TEST_DIR + "/filejournaltestError"); + // abort after 10th roll + NNStorage storage = setupEdits(Collections.singletonList(f.toURI()), + 10, new AbortSpec(10, 0)); + StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); + + FileJournalManager jm = new FileJournalManager(sd, storage); + String sdRootPath = sd.getRoot().getAbsolutePath(); + FileUtil.chmod(sdRootPath, "-w", true); + try { + jm.finalizeLogSegment(0, 1); + } finally { + assertTrue(storage.getRemovedStorageDirs().contains(sd)); + FileUtil.chmod(sdRootPath, "+w", true); + } + } /** * Test that we can read from a stream created by FileJournalManager. @@ -176,7 +196,7 @@ public void testReadFromStream() throws IOException { 10, new AbortSpec(10, 0)); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); long expectedTotalTxnCount = TXNS_PER_ROLL*10 + TXNS_PER_FAIL; assertEquals(expectedTotalTxnCount, jm.getNumberOfTransactions(1, true)); @@ -211,7 +231,7 @@ public void testAskForTransactionsMidfile() throws IOException { 10); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); // 10 rolls, so 11 rolled files, 110 txids total. final int TOTAL_TXIDS = 10 * 11; @@ -248,7 +268,7 @@ public boolean accept(File dir, String name) { assertEquals(1, files.length); assertTrue(files[0].delete()); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); assertEquals(startGapTxId-1, jm.getNumberOfTransactions(1, true)); try { @@ -286,7 +306,7 @@ public boolean accept(File dir, String name) { corruptAfterStartSegment(files[0]); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); assertEquals(10*TXNS_PER_ROLL+1, jm.getNumberOfTransactions(1, true)); } @@ -300,7 +320,8 @@ public void testGetRemoteEditLog() throws IOException { NNStorage.getInProgressEditsFileName(201), NNStorage.getFinalizedEditsFileName(1001, 1100)); - FileJournalManager fjm = new FileJournalManager(sd); + // passing null for NNStorage because this unit test will not use it + FileJournalManager fjm = new FileJournalManager(sd, null); assertEquals("[1,100],[101,200],[1001,1100]", getLogsAsString(fjm, 1)); assertEquals("[101,200],[1001,1100]", getLogsAsString(fjm, 101)); assertEquals("[1001,1100]", getLogsAsString(fjm, 201)); @@ -336,7 +357,7 @@ public void testReadFromMiddleOfEditLog() throws CorruptionException, 10); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); EditLogInputStream elis = jm.getInputStream(5, true); FSEditLogOp op = elis.readOp(); @@ -357,7 +378,7 @@ public void testExcludeInProgressStreams() throws CorruptionException, 10, false); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); - FileJournalManager jm = new FileJournalManager(sd); + FileJournalManager jm = new FileJournalManager(sd, storage); // If we exclude the in-progess stream, we should only have 100 tx. assertEquals(100, jm.getNumberOfTransactions(1, false)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java index 6ff91f41a28..4c6334f53ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java @@ -292,8 +292,9 @@ public FSEditLog mockEditLog(StoragePurger purger) { for (FakeRoot root : dirRoots.values()) { if (!root.type.isOfType(NameNodeDirType.EDITS)) continue; + // passing null NNStorage for unit test because it does not use it FileJournalManager fjm = new FileJournalManager( - root.mockStorageDir()); + root.mockStorageDir(), null); fjm.purger = purger; jms.add(fjm); }