HDFS-3885. QJM: optimize log sync when JN is lagging behind. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-3077@1383039 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aa65777ef0
commit
ca4582222e
|
@ -54,3 +54,5 @@ HDFS-3893. QJM: Make QJM work with security enabled. (atm)
|
|||
HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
|
||||
|
||||
HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
|
||||
|
||||
HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
|
||||
|
|
|
@ -84,7 +84,7 @@ class BookKeeperEditLogOutputStream
|
|||
@Override
|
||||
public void close() throws IOException {
|
||||
setReadyToFlush();
|
||||
flushAndSync();
|
||||
flushAndSync(true);
|
||||
try {
|
||||
lh.close();
|
||||
} catch (InterruptedException ie) {
|
||||
|
@ -130,7 +130,7 @@ class BookKeeperEditLogOutputStream
|
|||
}
|
||||
|
||||
@Override
|
||||
public void flushAndSync() throws IOException {
|
||||
public void flushAndSync(boolean durable) throws IOException {
|
||||
assert(syncLatch != null);
|
||||
try {
|
||||
syncLatch.await();
|
||||
|
|
|
@ -77,7 +77,7 @@ class QuorumOutputStream extends EditLogOutputStream {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void flushAndSync() throws IOException {
|
||||
protected void flushAndSync(boolean durable) throws IOException {
|
||||
int numReadyBytes = buf.countReadyBytes();
|
||||
if (numReadyBytes > 0) {
|
||||
int numReadyTxns = buf.countReadyTxns();
|
||||
|
|
|
@ -301,17 +301,23 @@ class Journal implements Closeable {
|
|||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId);
|
||||
}
|
||||
|
||||
// If the edit has already been marked as committed, we know
|
||||
// it has been fsynced on a quorum of other nodes, and we are
|
||||
// "catching up" with the rest. Hence we do not need to fsync.
|
||||
boolean isLagging = lastTxnId <= committedTxnId.get();
|
||||
boolean shouldFsync = !isLagging;
|
||||
|
||||
curSegment.writeRaw(records, 0, records.length);
|
||||
curSegment.setReadyToFlush();
|
||||
Stopwatch sw = new Stopwatch();
|
||||
sw.start();
|
||||
curSegment.flush();
|
||||
curSegment.flush(shouldFsync);
|
||||
sw.stop();
|
||||
|
||||
metrics.addSync(sw.elapsedTime(TimeUnit.MICROSECONDS));
|
||||
|
||||
if (committedTxnId.get() > lastTxnId) {
|
||||
|
||||
if (isLagging) {
|
||||
// This batch of edits has already been committed on a quorum of other
|
||||
// nodes. So, we are in "catch up" mode. This gets its own metric.
|
||||
metrics.batchesWrittenWhileLagging.incr(1);
|
||||
|
|
|
@ -114,7 +114,7 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
|
|||
}
|
||||
|
||||
@Override // EditLogOutputStream
|
||||
protected void flushAndSync() throws IOException {
|
||||
protected void flushAndSync(boolean durable) throws IOException {
|
||||
assert out.getLength() == 0 : "Output buffer is not empty";
|
||||
|
||||
int numReadyTxns = doubleBuf.countReadyTxns();
|
||||
|
|
|
@ -176,7 +176,7 @@ public class EditLogFileOutputStream extends EditLogOutputStream {
|
|||
* accumulates new log records while readyBuffer will be flushed and synced.
|
||||
*/
|
||||
@Override
|
||||
public void flushAndSync() throws IOException {
|
||||
public void flushAndSync(boolean durable) throws IOException {
|
||||
if (fp == null) {
|
||||
throw new IOException("Trying to use aborted output stream");
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ public class EditLogFileOutputStream extends EditLogOutputStream {
|
|||
}
|
||||
preallocate(); // preallocate file if necessay
|
||||
doubleBuf.flushTo(fp);
|
||||
if (!shouldSkipFsyncForTests) {
|
||||
if (durable && !shouldSkipFsyncForTests) {
|
||||
fc.force(false); // metadata updates not needed
|
||||
}
|
||||
}
|
||||
|
|
|
@ -93,18 +93,24 @@ public abstract class EditLogOutputStream implements Closeable {
|
|||
/**
|
||||
* Flush and sync all data that is ready to be flush
|
||||
* {@link #setReadyToFlush()} into underlying persistent store.
|
||||
* @param durable if true, the edits should be made truly durable before
|
||||
* returning
|
||||
* @throws IOException
|
||||
*/
|
||||
abstract protected void flushAndSync() throws IOException;
|
||||
abstract protected void flushAndSync(boolean durable) throws IOException;
|
||||
|
||||
/**
|
||||
* Flush data to persistent store.
|
||||
* Collect sync metrics.
|
||||
*/
|
||||
public void flush() throws IOException {
|
||||
flush(true);
|
||||
}
|
||||
|
||||
public void flush(boolean durable) throws IOException {
|
||||
numSync++;
|
||||
long start = now();
|
||||
flushAndSync();
|
||||
flushAndSync(durable);
|
||||
long end = now();
|
||||
totalTimeSync += (end - start);
|
||||
}
|
||||
|
|
|
@ -471,12 +471,12 @@ public class JournalSet implements JournalManager {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void flushAndSync() throws IOException {
|
||||
protected void flushAndSync(final boolean durable) throws IOException {
|
||||
mapJournalsAndReportErrors(new JournalClosure() {
|
||||
@Override
|
||||
public void apply(JournalAndStream jas) throws IOException {
|
||||
if (jas.isActive()) {
|
||||
jas.getCurrentStream().flushAndSync();
|
||||
jas.getCurrentStream().flushAndSync(durable);
|
||||
}
|
||||
}
|
||||
}, "flushAndSync");
|
||||
|
|
|
@ -56,7 +56,7 @@ public class BinaryEditsVisitor implements OfflineEditsVisitor {
|
|||
@Override
|
||||
public void close(Throwable error) throws IOException {
|
||||
elfos.setReadyToFlush();
|
||||
elfos.flushAndSync();
|
||||
elfos.flushAndSync(true);
|
||||
elfos.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -1222,7 +1222,7 @@ public class TestEditLog {
|
|||
elfos.create();
|
||||
elfos.writeRaw(garbage, 0, garbage.length);
|
||||
elfos.setReadyToFlush();
|
||||
elfos.flushAndSync();
|
||||
elfos.flushAndSync(true);
|
||||
elfos.close();
|
||||
elfos = null;
|
||||
file = new File(TEST_LOG_NAME);
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestEditLogFileOutputStream {
|
|||
static void flushAndCheckLength(EditLogFileOutputStream elos,
|
||||
long expectedLength) throws IOException {
|
||||
elos.setReadyToFlush();
|
||||
elos.flushAndSync();
|
||||
elos.flushAndSync(true);
|
||||
assertEquals(expectedLength, elos.getFile().length());
|
||||
}
|
||||
|
||||
|
|
|
@ -74,7 +74,7 @@ public class TestNameNodeRecovery {
|
|||
|
||||
elts.addTransactionsToLog(elfos, cache);
|
||||
elfos.setReadyToFlush();
|
||||
elfos.flushAndSync();
|
||||
elfos.flushAndSync(true);
|
||||
elfos.close();
|
||||
elfos = null;
|
||||
file = new File(TEST_LOG_NAME);
|
||||
|
|
Loading…
Reference in New Issue