From 653ef52ef267b5a46642e75ab08bd34ddae5503d Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Mon, 26 Oct 2015 15:45:02 -0700 Subject: [PATCH] HDFS-9305. Delayed heartbeat processing causes storm of subsequent heartbeats. (Contributed by Arpit Agarwal) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/datanode/BPServiceActor.java | 4 ++-- .../datanode/TestBpServiceActorScheduler.java | 22 +++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0ff77332eac..50bc0c40c24 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -75,6 +75,9 @@ Release 2.7.2 - UNRELEASED HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly older NameNodes. (Tony Wu via kihwal) + HDFS-9305. Delayed heartbeat processing causes storm of subsequent + heartbeats. (Arpit Agarwal) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index b497cb6caed..45b11231260 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -539,6 +539,7 @@ class BPServiceActor implements Runnable { } HeartbeatResponse sendHeartBeat() throws IOException { + scheduler.scheduleNextHeartbeat(); StorageReport[] reports = dn.getFSDataset().getStorageReports(bpos.getBlockPoolId()); if (LOG.isDebugEnabled()) { @@ -648,7 +649,6 @@ class BPServiceActor implements Runnable { // -- Total capacity // -- Bytes remaining // - scheduler.scheduleNextHeartbeat(); if (!dn.areHeartbeatsDisabledForTests()) { HeartbeatResponse resp = sendHeartBeat(); assert resp != null; @@ -1038,7 +1038,7 @@ class BPServiceActor implements Runnable { long scheduleNextHeartbeat() { // Numerical overflow is possible here and is okay. - nextHeartbeatTime += heartbeatIntervalMs; + nextHeartbeatTime = monotonicNow() + heartbeatIntervalMs; return nextHeartbeatTime; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java index 0d7484c8667..258a14bb1aa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java @@ -144,6 +144,28 @@ public class TestBpServiceActorScheduler { } } + + /** + * Regression test for HDFS-9305. + * Delayed processing of a heartbeat can cause a subsequent heartbeat + * storm. + */ + @Test + public void testScheduleDelayedHeartbeat() { + for (final long now : getTimestamps()) { + Scheduler scheduler = makeMockScheduler(now); + scheduler.scheduleNextHeartbeat(); + assertFalse(scheduler.isHeartbeatDue(now)); + + // Simulate a delayed heartbeat e.g. due to slow processing by NN. + scheduler.nextHeartbeatTime = now - (HEARTBEAT_INTERVAL_MS * 10); + scheduler.scheduleNextHeartbeat(); + + // Ensure that the next heartbeat is not due immediately. + assertFalse(scheduler.isHeartbeatDue(now)); + } + } + private Scheduler makeMockScheduler(long now) { LOG.info("Using now = " + now); Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, BLOCK_REPORT_INTERVAL_MS));