HDFS-14557. JournalNode error: Can't scan a pre-transactional edit log. Contributed by Stephen O'Donnell.
Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org> (cherry picked from commit35e0a01d7b
) (cherry picked from commit8414e6354b
)
This commit is contained in:
parent
698e74d097
commit
45ba41570b
|
@ -163,6 +163,16 @@ public class EditLogFileInputStream extends EditLogInputStream {
|
|||
} catch (EOFException eofe) {
|
||||
throw new LogHeaderCorruptException("No header found in log");
|
||||
}
|
||||
if (logVersion == -1) {
|
||||
// The edits in progress file is pre-allocated with 1MB of "-1" bytes
|
||||
// when it is created, then the header is written. If the header is
|
||||
// -1, it indicates the an exception occurred pre-allocating the file
|
||||
// and the header was never written. Therefore this is effectively a
|
||||
// corrupt and empty log.
|
||||
throw new LogHeaderCorruptException("No header present in log (value " +
|
||||
"is -1), probably due to disk space issues when it was created. " +
|
||||
"The log has no transactions and will be sidelined.");
|
||||
}
|
||||
// We assume future layout will also support ADD_LAYOUT_FLAGS
|
||||
if (NameNodeLayoutVersion.supports(
|
||||
LayoutVersion.Feature.ADD_LAYOUT_FLAGS, logVersion) ||
|
||||
|
|
|
@ -1284,6 +1284,12 @@ public class FSEditLogLoader {
|
|||
+ lastPos, t);
|
||||
in.resync();
|
||||
FSImage.LOG.warn("After resync, position is " + in.getPosition());
|
||||
if (in.getPosition() <= lastPos) {
|
||||
FSImage.LOG.warn("After resync, the position, {} is not greater " +
|
||||
"than the previous position {}. Skipping remainder of this log.",
|
||||
in.getPosition(), lastPos);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (lastTxId == HdfsServerConstants.INVALID_TXID || txid > lastTxId) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.io.ByteArrayOutputStream;
|
|||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
|
@ -125,6 +126,48 @@ public class TestJournal {
|
|||
Assert.assertEquals(1, segmentState.getStartTxId());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for HDFS-14557 to ensure that a edit file that failed to fully
|
||||
* allocate and has a header byte of -1 is moved aside to allow startup
|
||||
* to progress.
|
||||
*/
|
||||
@Test
|
||||
public void testEmptyEditsInProgressMovedAside() throws Exception {
|
||||
// First, write 5 transactions to the journal
|
||||
journal.startLogSegment(makeRI(1), 1,
|
||||
NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION - 1);
|
||||
final int numTxns = 5;
|
||||
byte[] ops = QJMTestUtil.createTxnData(1, 5);
|
||||
journal.journal(makeRI(2), 1, 1, numTxns, ops);
|
||||
// Now close the segment
|
||||
journal.finalizeLogSegment(makeRI(3), 1, numTxns);
|
||||
|
||||
// Create a new segment creating a new edits_inprogress file
|
||||
journal.startLogSegment(makeRI(4), 6,
|
||||
NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION - 1);
|
||||
ops = QJMTestUtil.createTxnData(6, 5);
|
||||
journal.journal(makeRI(5), 6, 6, numTxns, ops);
|
||||
File eip = journal.getStorage().getInProgressEditLog(6);
|
||||
|
||||
// Now stop the journal without finalizing the segment
|
||||
journal.close();
|
||||
|
||||
// Now "zero out" the EIP file with -1 bytes, similar to how it would
|
||||
// appear if the pre-allocation failed
|
||||
RandomAccessFile rwf = new RandomAccessFile(eip, "rw");
|
||||
for (int i=0; i<rwf.length(); i++) {
|
||||
rwf.write(-1);
|
||||
}
|
||||
rwf.close();
|
||||
|
||||
// Finally start the Journal again, and ensure the "zeroed out" file
|
||||
// is renamed with a .empty extension
|
||||
journal = new Journal(conf, TEST_LOG_DIR, JID, StartupOption.REGULAR,
|
||||
mockErrorReporter);
|
||||
File movedTo = new File(eip.getAbsolutePath()+".empty");
|
||||
assertTrue(movedTo.exists());
|
||||
}
|
||||
|
||||
@Test (timeout = 10000)
|
||||
public void testEpochHandling() throws Exception {
|
||||
assertEquals(0, journal.getLastPromisedEpoch());
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.io.FileOutputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.EnumMap;
|
||||
|
@ -160,4 +161,25 @@ public class TestEditLogFileInputStream {
|
|||
}
|
||||
elis.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Regression test for HDFS-14557 which verifies that an edit log filled
|
||||
* with only "-1" bytes is moved aside and does not prevent the Journal
|
||||
* node from starting.
|
||||
*/
|
||||
@Test(timeout=60000)
|
||||
public void testScanEditThatFailedDuringPreAllocate() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
File editLog = new File(GenericTestUtils.getTempPath("testCorruptEditLog"));
|
||||
FileOutputStream os = new FileOutputStream(editLog);
|
||||
for (int i=0; i<1024; i++) {
|
||||
os.write(-1);
|
||||
}
|
||||
os.close();
|
||||
FSEditLogLoader.EditLogValidation val =
|
||||
EditLogFileInputStream.scanEditLog(editLog, 1234, false);
|
||||
assertEquals(true, val.hasCorruptHeader());
|
||||
assertEquals(HdfsServerConstants.INVALID_TXID, val.getEndTxId());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue