HDFS-14557. JournalNode error: Can't scan a pre-transactional edit log. Contributed by Stephen O'Donnell.
Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
(cherry picked from commit 35e0a01d7b
)
This commit is contained in:
parent
0c02217d9b
commit
8414e6354b
|
@ -163,6 +163,16 @@ public class EditLogFileInputStream extends EditLogInputStream {
|
||||||
} catch (EOFException eofe) {
|
} catch (EOFException eofe) {
|
||||||
throw new LogHeaderCorruptException("No header found in log");
|
throw new LogHeaderCorruptException("No header found in log");
|
||||||
}
|
}
|
||||||
|
if (logVersion == -1) {
|
||||||
|
// The edits in progress file is pre-allocated with 1MB of "-1" bytes
|
||||||
|
// when it is created, then the header is written. If the header is
|
||||||
|
// -1, it indicates the an exception occurred pre-allocating the file
|
||||||
|
// and the header was never written. Therefore this is effectively a
|
||||||
|
// corrupt and empty log.
|
||||||
|
throw new LogHeaderCorruptException("No header present in log (value " +
|
||||||
|
"is -1), probably due to disk space issues when it was created. " +
|
||||||
|
"The log has no transactions and will be sidelined.");
|
||||||
|
}
|
||||||
// We assume future layout will also support ADD_LAYOUT_FLAGS
|
// We assume future layout will also support ADD_LAYOUT_FLAGS
|
||||||
if (NameNodeLayoutVersion.supports(
|
if (NameNodeLayoutVersion.supports(
|
||||||
LayoutVersion.Feature.ADD_LAYOUT_FLAGS, logVersion) ||
|
LayoutVersion.Feature.ADD_LAYOUT_FLAGS, logVersion) ||
|
||||||
|
|
|
@ -1285,6 +1285,12 @@ public class FSEditLogLoader {
|
||||||
+ lastPos, t);
|
+ lastPos, t);
|
||||||
in.resync();
|
in.resync();
|
||||||
FSImage.LOG.warn("After resync, position is " + in.getPosition());
|
FSImage.LOG.warn("After resync, position is " + in.getPosition());
|
||||||
|
if (in.getPosition() <= lastPos) {
|
||||||
|
FSImage.LOG.warn("After resync, the position, {} is not greater " +
|
||||||
|
"than the previous position {}. Skipping remainder of this log.",
|
||||||
|
in.getPosition(), lastPos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (lastTxId == HdfsServerConstants.INVALID_TXID || txid > lastTxId) {
|
if (lastTxId == HdfsServerConstants.INVALID_TXID || txid > lastTxId) {
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.io.ByteArrayOutputStream;
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
|
@ -125,6 +126,48 @@ public class TestJournal {
|
||||||
Assert.assertEquals(1, segmentState.getStartTxId());
|
Assert.assertEquals(1, segmentState.getStartTxId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for HDFS-14557 to ensure that a edit file that failed to fully
|
||||||
|
* allocate and has a header byte of -1 is moved aside to allow startup
|
||||||
|
* to progress.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testEmptyEditsInProgressMovedAside() throws Exception {
|
||||||
|
// First, write 5 transactions to the journal
|
||||||
|
journal.startLogSegment(makeRI(1), 1,
|
||||||
|
NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION - 1);
|
||||||
|
final int numTxns = 5;
|
||||||
|
byte[] ops = QJMTestUtil.createTxnData(1, 5);
|
||||||
|
journal.journal(makeRI(2), 1, 1, numTxns, ops);
|
||||||
|
// Now close the segment
|
||||||
|
journal.finalizeLogSegment(makeRI(3), 1, numTxns);
|
||||||
|
|
||||||
|
// Create a new segment creating a new edits_inprogress file
|
||||||
|
journal.startLogSegment(makeRI(4), 6,
|
||||||
|
NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION - 1);
|
||||||
|
ops = QJMTestUtil.createTxnData(6, 5);
|
||||||
|
journal.journal(makeRI(5), 6, 6, numTxns, ops);
|
||||||
|
File eip = journal.getStorage().getInProgressEditLog(6);
|
||||||
|
|
||||||
|
// Now stop the journal without finalizing the segment
|
||||||
|
journal.close();
|
||||||
|
|
||||||
|
// Now "zero out" the EIP file with -1 bytes, similar to how it would
|
||||||
|
// appear if the pre-allocation failed
|
||||||
|
RandomAccessFile rwf = new RandomAccessFile(eip, "rw");
|
||||||
|
for (int i=0; i<rwf.length(); i++) {
|
||||||
|
rwf.write(-1);
|
||||||
|
}
|
||||||
|
rwf.close();
|
||||||
|
|
||||||
|
// Finally start the Journal again, and ensure the "zeroed out" file
|
||||||
|
// is renamed with a .empty extension
|
||||||
|
journal = new Journal(conf, TEST_LOG_DIR, JID, StartupOption.REGULAR,
|
||||||
|
mockErrorReporter);
|
||||||
|
File movedTo = new File(eip.getAbsolutePath()+".empty");
|
||||||
|
assertTrue(movedTo.exists());
|
||||||
|
}
|
||||||
|
|
||||||
@Test (timeout = 10000)
|
@Test (timeout = 10000)
|
||||||
public void testEpochHandling() throws Exception {
|
public void testEpochHandling() throws Exception {
|
||||||
assertEquals(0, journal.getLastPromisedEpoch());
|
assertEquals(0, journal.getLastPromisedEpoch());
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.EnumMap;
|
import java.util.EnumMap;
|
||||||
|
@ -160,4 +161,25 @@ public class TestEditLogFileInputStream {
|
||||||
}
|
}
|
||||||
elis.close();
|
elis.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Regression test for HDFS-14557 which verifies that an edit log filled
|
||||||
|
* with only "-1" bytes is moved aside and does not prevent the Journal
|
||||||
|
* node from starting.
|
||||||
|
*/
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testScanEditThatFailedDuringPreAllocate() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
File editLog = new File(GenericTestUtils.getTempPath("testCorruptEditLog"));
|
||||||
|
FileOutputStream os = new FileOutputStream(editLog);
|
||||||
|
for (int i=0; i<1024; i++) {
|
||||||
|
os.write(-1);
|
||||||
|
}
|
||||||
|
os.close();
|
||||||
|
FSEditLogLoader.EditLogValidation val =
|
||||||
|
EditLogFileInputStream.scanEditLog(editLog, 1234, false);
|
||||||
|
assertEquals(true, val.hasCorruptHeader());
|
||||||
|
assertEquals(HdfsServerConstants.INVALID_TXID, val.getEndTxId());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue