HDFS-9305. Delayed heartbeat processing causes storm of subsequent heartbeats. (Contributed by Arpit Agarwal)
This commit is contained in:
parent
e8aefdf08b
commit
d8736eb9ca
|
@ -2218,6 +2218,9 @@ Release 2.7.2 - UNRELEASED
|
||||||
HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly
|
HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly
|
||||||
older NameNodes. (Tony Wu via kihwal)
|
older NameNodes. (Tony Wu via kihwal)
|
||||||
|
|
||||||
|
HDFS-9305. Delayed heartbeat processing causes storm of subsequent
|
||||||
|
heartbeats. (Arpit Agarwal)
|
||||||
|
|
||||||
Release 2.7.1 - 2015-07-06
|
Release 2.7.1 - 2015-07-06
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -538,6 +538,7 @@ class BPServiceActor implements Runnable {
|
||||||
|
|
||||||
HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease)
|
HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
scheduler.scheduleNextHeartbeat();
|
||||||
StorageReport[] reports =
|
StorageReport[] reports =
|
||||||
dn.getFSDataset().getStorageReports(bpos.getBlockPoolId());
|
dn.getFSDataset().getStorageReports(bpos.getBlockPoolId());
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
|
@ -651,7 +652,6 @@ class BPServiceActor implements Runnable {
|
||||||
//
|
//
|
||||||
boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) &&
|
boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) &&
|
||||||
scheduler.isBlockReportDue(startTime);
|
scheduler.isBlockReportDue(startTime);
|
||||||
scheduler.scheduleNextHeartbeat();
|
|
||||||
if (!dn.areHeartbeatsDisabledForTests()) {
|
if (!dn.areHeartbeatsDisabledForTests()) {
|
||||||
resp = sendHeartBeat(requestBlockReportLease);
|
resp = sendHeartBeat(requestBlockReportLease);
|
||||||
assert resp != null;
|
assert resp != null;
|
||||||
|
@ -1064,7 +1064,7 @@ class BPServiceActor implements Runnable {
|
||||||
|
|
||||||
long scheduleNextHeartbeat() {
|
long scheduleNextHeartbeat() {
|
||||||
// Numerical overflow is possible here and is okay.
|
// Numerical overflow is possible here and is okay.
|
||||||
nextHeartbeatTime += heartbeatIntervalMs;
|
nextHeartbeatTime = monotonicNow() + heartbeatIntervalMs;
|
||||||
return nextHeartbeatTime;
|
return nextHeartbeatTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -144,6 +144,28 @@ public class TestBpServiceActorScheduler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Regression test for HDFS-9305.
|
||||||
|
* Delayed processing of a heartbeat can cause a subsequent heartbeat
|
||||||
|
* storm.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testScheduleDelayedHeartbeat() {
|
||||||
|
for (final long now : getTimestamps()) {
|
||||||
|
Scheduler scheduler = makeMockScheduler(now);
|
||||||
|
scheduler.scheduleNextHeartbeat();
|
||||||
|
assertFalse(scheduler.isHeartbeatDue(now));
|
||||||
|
|
||||||
|
// Simulate a delayed heartbeat e.g. due to slow processing by NN.
|
||||||
|
scheduler.nextHeartbeatTime = now - (HEARTBEAT_INTERVAL_MS * 10);
|
||||||
|
scheduler.scheduleNextHeartbeat();
|
||||||
|
|
||||||
|
// Ensure that the next heartbeat is not due immediately.
|
||||||
|
assertFalse(scheduler.isHeartbeatDue(now));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private Scheduler makeMockScheduler(long now) {
|
private Scheduler makeMockScheduler(long now) {
|
||||||
LOG.info("Using now = " + now);
|
LOG.info("Using now = " + now);
|
||||||
Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, BLOCK_REPORT_INTERVAL_MS));
|
Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, BLOCK_REPORT_INTERVAL_MS));
|
||||||
|
|
Loading…
Reference in New Issue