HDFS-16143. Add Timer in EditLogTailer and de-flake TestEditLogTailer#testStandbyTriggersLogRollsWhenTailInProgressEdits (#3235)

Contributed by Viraj Jasani.

Signed-off-by: Mingliang Liu <liuml07@apache.org>
Signed-off-by: Takanobu Asanuma <tasanuma@apache.org>
Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
This commit is contained in:
Viraj Jasani 2021-08-26 13:07:38 +05:30 committed by GitHub
parent b53cae0ffb
commit aa9cdf2af6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 82 additions and 39 deletions

View File

@ -39,6 +39,16 @@ public class FakeTimer extends Timer {
nowNanos = TimeUnit.MILLISECONDS.toNanos(1000);
}
/**
* FakeTimer constructor with milliseconds to keep as initial value.
*
* @param time time in millis.
*/
public FakeTimer(long time) {
now = time;
nowNanos = TimeUnit.MILLISECONDS.toNanos(time);
}
@Override
public long now() {
return now;

View File

@ -36,6 +36,7 @@ import java.util.concurrent.TimeoutException;
import org.apache.hadoop.thirdparty.com.google.common.collect.Iterators;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.util.Timer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -55,12 +56,10 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.security.SecurityUtil;
import static org.apache.hadoop.util.Time.monotonicNow;
import static org.apache.hadoop.util.ExitUtil.terminate;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.util.Time;
/**
@ -172,14 +171,21 @@ public class EditLogTailer {
*/
private final long maxTxnsPerLock;
/**
* Timer instance to be set only using constructor.
* Only tests can reassign this by using setTimerForTests().
* For source code, this timer instance should be treated as final.
*/
private Timer timer;
public EditLogTailer(FSNamesystem namesystem, Configuration conf) {
this.tailerThread = new EditLogTailerThread();
this.conf = conf;
this.namesystem = namesystem;
this.timer = new Timer();
this.editLog = namesystem.getEditLog();
lastLoadTimeMs = monotonicNow();
lastRollTimeMs = monotonicNow();
this.lastLoadTimeMs = timer.monotonicNow();
this.lastRollTimeMs = timer.monotonicNow();
logRollPeriodMs = conf.getTimeDuration(
DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY,
@ -301,7 +307,7 @@ public class EditLogTailer {
long editsTailed = 0;
// Fully tail the journal to the end
do {
long startTime = Time.monotonicNow();
long startTime = timer.monotonicNow();
try {
NameNode.getNameNodeMetrics().addEditLogTailInterval(
startTime - lastLoadTimeMs);
@ -312,7 +318,7 @@ public class EditLogTailer {
throw new IOException(e);
} finally {
NameNode.getNameNodeMetrics().addEditLogTailTime(
Time.monotonicNow() - startTime);
timer.monotonicNow() - startTime);
}
} while(editsTailed > 0);
return null;
@ -336,7 +342,7 @@ public class EditLogTailer {
LOG.debug("lastTxnId: " + lastTxnId);
}
Collection<EditLogInputStream> streams;
long startTime = Time.monotonicNow();
long startTime = timer.monotonicNow();
try {
streams = editLog.selectInputStreams(lastTxnId + 1, 0,
null, inProgressOk, true);
@ -349,7 +355,7 @@ public class EditLogTailer {
return 0;
} finally {
NameNode.getNameNodeMetrics().addEditLogFetchTime(
Time.monotonicNow() - startTime);
timer.monotonicNow() - startTime);
}
if (LOG.isDebugEnabled()) {
LOG.debug("edit streams to load from: " + streams.size());
@ -374,7 +380,7 @@ public class EditLogTailer {
}
if (editsLoaded > 0) {
lastLoadTimeMs = monotonicNow();
lastLoadTimeMs = timer.monotonicNow();
}
lastLoadedTxnId = image.getLastAppliedTxId();
return editsLoaded;
@ -395,7 +401,7 @@ public class EditLogTailer {
*/
private boolean tooLongSinceLastLoad() {
return logRollPeriodMs >= 0 &&
(monotonicNow() - lastRollTimeMs) > logRollPeriodMs;
(timer.monotonicNow() - lastRollTimeMs) > logRollPeriodMs;
}
/**
@ -423,9 +429,9 @@ public class EditLogTailer {
try {
future = rollEditsRpcExecutor.submit(getNameNodeProxy());
future.get(rollEditsTimeoutMs, TimeUnit.MILLISECONDS);
lastRollTimeMs = monotonicNow();
this.lastRollTimeMs = timer.monotonicNow();
lastRollTriggerTxId = lastLoadedTxnId;
} catch (ExecutionException e) {
} catch (ExecutionException | InterruptedException e) {
LOG.warn("Unable to trigger a roll of the active NN", e);
} catch (TimeoutException e) {
if (future != null) {
@ -433,11 +439,30 @@ public class EditLogTailer {
}
LOG.warn(String.format(
"Unable to finish rolling edits in %d ms", rollEditsTimeoutMs));
} catch (InterruptedException e) {
LOG.warn("Unable to trigger a roll of the active NN", e);
}
}
/**
* This is only to be used by tests. For source code, the only way to
* set timer is by using EditLogTailer constructor.
*
* @param newTimer Timer instance provided by tests.
*/
@VisibleForTesting
void setTimerForTest(final Timer newTimer) {
this.timer = newTimer;
}
/**
* Used by tests. Return Timer instance used by EditLogTailer.
*
* @return Return Timer instance used by EditLogTailer.
*/
@VisibleForTesting
Timer getTimer() {
return timer;
}
@VisibleForTesting
void sleep(long sleepTimeMillis) throws InterruptedException {
Thread.sleep(sleepTimeMillis);
@ -497,7 +522,7 @@ public class EditLogTailer {
// name system lock will be acquired to further block even the block
// state updates.
namesystem.cpLockInterruptibly();
long startTime = Time.monotonicNow();
long startTime = timer.monotonicNow();
try {
NameNode.getNameNodeMetrics().addEditLogTailInterval(
startTime - lastLoadTimeMs);
@ -505,7 +530,7 @@ public class EditLogTailer {
} finally {
namesystem.cpUnlock();
NameNode.getNameNodeMetrics().addEditLogTailTime(
Time.monotonicNow() - startTime);
timer.monotonicNow() - startTime);
}
//Update NameDirSize Metric
if (triggeredLogRoll) {

View File

@ -56,6 +56,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.FakeTimer;
import org.slf4j.event.Level;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -394,13 +395,15 @@ public class TestEditLogTailer {
// Time in seconds to wait before checking if edit logs are rolled while
// expecting no edit log roll
final int noLogRollWaitTime = 2;
// Time in seconds to wait before checking if edit logs are rolled while
// expecting edit log roll
// expecting edit log roll.
final int logRollWaitTime = 3;
final int logRollPeriod = standbyCatchupWaitTime + noLogRollWaitTime + 1;
final long logRollPeriodMs = TimeUnit.SECONDS.toMillis(logRollPeriod);
Configuration conf = getConf();
conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY,
standbyCatchupWaitTime + noLogRollWaitTime + 1);
conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, logRollPeriod);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.setBoolean(DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY, true);
@ -429,19 +432,29 @@ public class TestEditLogTailer {
waitForStandbyToCatchUpWithInProgressEdits(standby, activeTxId,
standbyCatchupWaitTime);
long curTime = standby.getNamesystem().getEditLogTailer().getTimer()
.monotonicNow();
long insufficientTimeForLogRoll = logRollPeriodMs / 3;
final FakeTimer testTimer =
new FakeTimer(curTime + insufficientTimeForLogRoll);
standby.getNamesystem().getEditLogTailer().setTimerForTest(testTimer);
Thread.sleep(2000);
for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
NameNodeAdapter.mkdirs(active, getDirPath(i),
new PermissionStatus("test", "test",
new FsPermission((short)00755)), true);
}
boolean exceptionThrown = false;
try {
checkForLogRoll(active, origTxId, noLogRollWaitTime);
fail("Expected to timeout");
} catch (TimeoutException e) {
exceptionThrown = true;
// expected
}
assertTrue(exceptionThrown);
long sufficientTimeForLogRoll = logRollPeriodMs * 3;
testTimer.advance(sufficientTimeForLogRoll);
checkForLogRoll(active, origTxId, logRollWaitTime);
} finally {
@ -452,26 +465,20 @@ public class TestEditLogTailer {
private static void waitForStandbyToCatchUpWithInProgressEdits(
final NameNode standby, final long activeTxId,
int maxWaitSec) throws Exception {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
long standbyTxId = standby.getNamesystem().getFSImage()
.getLastAppliedTxId();
return (standbyTxId >= activeTxId);
}
}, 100, maxWaitSec * 1000);
GenericTestUtils.waitFor(() -> {
long standbyTxId = standby.getNamesystem().getFSImage()
.getLastAppliedTxId();
return (standbyTxId >= activeTxId);
}, 100, TimeUnit.SECONDS.toMillis(maxWaitSec));
}
private static void checkForLogRoll(final NameNode active,
final long origTxId, int maxWaitSec) throws Exception {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
long curSegmentTxId = active.getNamesystem().getFSImage().getEditLog()
.getCurSegmentTxId();
return (origTxId != curSegmentTxId);
}
}, 100, maxWaitSec * 1000);
GenericTestUtils.waitFor(() -> {
long curSegmentTxId = active.getNamesystem().getFSImage().getEditLog()
.getCurSegmentTxId();
return (origTxId != curSegmentTxId);
}, 100, TimeUnit.SECONDS.toMillis(maxWaitSec));
}
private static MiniDFSCluster createMiniDFSCluster(Configuration conf,
@ -488,4 +495,5 @@ public class TestEditLogTailer {
.build();
return cluster;
}
}