From d8736eb9ca351b82854601ea3b1fbc3c9fab44e4 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Mon, 26 Oct 2015 15:45:02 -0700 Subject: [PATCH] HDFS-9305. Delayed heartbeat processing causes storm of subsequent heartbeats. (Contributed by Arpit Agarwal) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/datanode/BPServiceActor.java | 4 ++-- .../datanode/TestBpServiceActorScheduler.java | 22 +++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 478d48be219..e26abcc05fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2218,6 +2218,9 @@ Release 2.7.2 - UNRELEASED HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly older NameNodes. (Tony Wu via kihwal) + HDFS-9305. Delayed heartbeat processing causes storm of subsequent + heartbeats. (Arpit Agarwal) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index 85ea6ae77a5..575e7ccd9f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -538,6 +538,7 @@ class BPServiceActor implements Runnable { HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) throws IOException { + scheduler.scheduleNextHeartbeat(); StorageReport[] reports = dn.getFSDataset().getStorageReports(bpos.getBlockPoolId()); if (LOG.isDebugEnabled()) { @@ -651,7 +652,6 @@ class BPServiceActor implements Runnable { // boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) && scheduler.isBlockReportDue(startTime); - scheduler.scheduleNextHeartbeat(); if (!dn.areHeartbeatsDisabledForTests()) { resp = sendHeartBeat(requestBlockReportLease); assert resp != null; @@ -1064,7 +1064,7 @@ class BPServiceActor implements Runnable { long scheduleNextHeartbeat() { // Numerical overflow is possible here and is okay. - nextHeartbeatTime += heartbeatIntervalMs; + nextHeartbeatTime = monotonicNow() + heartbeatIntervalMs; return nextHeartbeatTime; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java index b9b6512d57f..efdd87c8575 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java @@ -144,6 +144,28 @@ public class TestBpServiceActorScheduler { } } + + /** + * Regression test for HDFS-9305. + * Delayed processing of a heartbeat can cause a subsequent heartbeat + * storm. + */ + @Test + public void testScheduleDelayedHeartbeat() { + for (final long now : getTimestamps()) { + Scheduler scheduler = makeMockScheduler(now); + scheduler.scheduleNextHeartbeat(); + assertFalse(scheduler.isHeartbeatDue(now)); + + // Simulate a delayed heartbeat e.g. due to slow processing by NN. + scheduler.nextHeartbeatTime = now - (HEARTBEAT_INTERVAL_MS * 10); + scheduler.scheduleNextHeartbeat(); + + // Ensure that the next heartbeat is not due immediately. + assertFalse(scheduler.isHeartbeatDue(now)); + } + } + private Scheduler makeMockScheduler(long now) { LOG.info("Using now = " + now); Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, BLOCK_REPORT_INTERVAL_MS));