HDFS-9305. Delayed heartbeat processing causes storm of subsequent heartbeats. (Contributed by Arpit Agarwal)

This commit is contained in:
Arpit Agarwal 2015-10-26 15:45:02 -07:00
parent e8aefdf08b
commit d8736eb9ca
3 changed files with 27 additions and 2 deletions

View File

@ -2218,6 +2218,9 @@ Release 2.7.2 - UNRELEASED
HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly
older NameNodes. (Tony Wu via kihwal) older NameNodes. (Tony Wu via kihwal)
HDFS-9305. Delayed heartbeat processing causes storm of subsequent
heartbeats. (Arpit Agarwal)
Release 2.7.1 - 2015-07-06 Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -538,6 +538,7 @@ class BPServiceActor implements Runnable {
HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease)
throws IOException { throws IOException {
scheduler.scheduleNextHeartbeat();
StorageReport[] reports = StorageReport[] reports =
dn.getFSDataset().getStorageReports(bpos.getBlockPoolId()); dn.getFSDataset().getStorageReports(bpos.getBlockPoolId());
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
@ -651,7 +652,6 @@ class BPServiceActor implements Runnable {
// //
boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) && boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) &&
scheduler.isBlockReportDue(startTime); scheduler.isBlockReportDue(startTime);
scheduler.scheduleNextHeartbeat();
if (!dn.areHeartbeatsDisabledForTests()) { if (!dn.areHeartbeatsDisabledForTests()) {
resp = sendHeartBeat(requestBlockReportLease); resp = sendHeartBeat(requestBlockReportLease);
assert resp != null; assert resp != null;
@ -1064,7 +1064,7 @@ class BPServiceActor implements Runnable {
long scheduleNextHeartbeat() { long scheduleNextHeartbeat() {
// Numerical overflow is possible here and is okay. // Numerical overflow is possible here and is okay.
nextHeartbeatTime += heartbeatIntervalMs; nextHeartbeatTime = monotonicNow() + heartbeatIntervalMs;
return nextHeartbeatTime; return nextHeartbeatTime;
} }

View File

@ -144,6 +144,28 @@ public class TestBpServiceActorScheduler {
} }
} }
/**
* Regression test for HDFS-9305.
* Delayed processing of a heartbeat can cause a subsequent heartbeat
* storm.
*/
@Test
public void testScheduleDelayedHeartbeat() {
for (final long now : getTimestamps()) {
Scheduler scheduler = makeMockScheduler(now);
scheduler.scheduleNextHeartbeat();
assertFalse(scheduler.isHeartbeatDue(now));
// Simulate a delayed heartbeat e.g. due to slow processing by NN.
scheduler.nextHeartbeatTime = now - (HEARTBEAT_INTERVAL_MS * 10);
scheduler.scheduleNextHeartbeat();
// Ensure that the next heartbeat is not due immediately.
assertFalse(scheduler.isHeartbeatDue(now));
}
}
private Scheduler makeMockScheduler(long now) { private Scheduler makeMockScheduler(long now) {
LOG.info("Using now = " + now); LOG.info("Using now = " + now);
Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, BLOCK_REPORT_INTERVAL_MS)); Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, BLOCK_REPORT_INTERVAL_MS));