HDFS-16139. Update BPServiceActor Scheduler's nextBlockReportTime atomically (#3228). Contributed by Viraj Jasani.
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
parent
97c88c97de
commit
b038042ece
|
@ -39,6 +39,7 @@ import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.ThreadLocalRandom;
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||||
|
@ -323,10 +324,10 @@ class BPServiceActor implements Runnable {
|
||||||
void triggerBlockReportForTests() {
|
void triggerBlockReportForTests() {
|
||||||
synchronized (ibrManager) {
|
synchronized (ibrManager) {
|
||||||
scheduler.scheduleHeartbeat();
|
scheduler.scheduleHeartbeat();
|
||||||
long oldBlockReportTime = scheduler.nextBlockReportTime;
|
long oldBlockReportTime = scheduler.getNextBlockReportTime();
|
||||||
scheduler.forceFullBlockReportNow();
|
scheduler.forceFullBlockReportNow();
|
||||||
ibrManager.notifyAll();
|
ibrManager.notifyAll();
|
||||||
while (oldBlockReportTime == scheduler.nextBlockReportTime) {
|
while (oldBlockReportTime == scheduler.getNextBlockReportTime()) {
|
||||||
try {
|
try {
|
||||||
ibrManager.wait(100);
|
ibrManager.wait(100);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
@ -1163,8 +1164,8 @@ class BPServiceActor implements Runnable {
|
||||||
// nextBlockReportTime and nextHeartbeatTime may be assigned/read
|
// nextBlockReportTime and nextHeartbeatTime may be assigned/read
|
||||||
// by testing threads (through BPServiceActor#triggerXXX), while also
|
// by testing threads (through BPServiceActor#triggerXXX), while also
|
||||||
// assigned/read by the actor thread.
|
// assigned/read by the actor thread.
|
||||||
@VisibleForTesting
|
private final AtomicLong nextBlockReportTime =
|
||||||
volatile long nextBlockReportTime = monotonicNow();
|
new AtomicLong(monotonicNow());
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
volatile long nextHeartbeatTime = monotonicNow();
|
volatile long nextHeartbeatTime = monotonicNow();
|
||||||
|
@ -1257,7 +1258,7 @@ class BPServiceActor implements Runnable {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isBlockReportDue(long curTime) {
|
boolean isBlockReportDue(long curTime) {
|
||||||
return nextBlockReportTime - curTime <= 0;
|
return nextBlockReportTime.get() - curTime <= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isOutliersReportDue(long curTime) {
|
boolean isOutliersReportDue(long curTime) {
|
||||||
|
@ -1281,15 +1282,15 @@ class BPServiceActor implements Runnable {
|
||||||
long scheduleBlockReport(long delay, boolean isRegistration) {
|
long scheduleBlockReport(long delay, boolean isRegistration) {
|
||||||
if (delay > 0) { // send BR after random delay
|
if (delay > 0) { // send BR after random delay
|
||||||
// Numerical overflow is possible here and is okay.
|
// Numerical overflow is possible here and is okay.
|
||||||
nextBlockReportTime =
|
nextBlockReportTime.getAndSet(
|
||||||
monotonicNow() + ThreadLocalRandom.current().nextInt((int) (delay));
|
monotonicNow() + ThreadLocalRandom.current().nextInt((int) (delay)));
|
||||||
} else { // send at next heartbeat
|
} else { // send at next heartbeat
|
||||||
nextBlockReportTime = monotonicNow();
|
nextBlockReportTime.getAndSet(monotonicNow());
|
||||||
}
|
}
|
||||||
resetBlockReportTime = isRegistration; // reset future BRs for
|
resetBlockReportTime = isRegistration; // reset future BRs for
|
||||||
// randomness, post first block report to avoid regular BRs from all
|
// randomness, post first block report to avoid regular BRs from all
|
||||||
// DN's coming at one time.
|
// DN's coming at one time.
|
||||||
return nextBlockReportTime;
|
return nextBlockReportTime.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1302,8 +1303,8 @@ class BPServiceActor implements Runnable {
|
||||||
// If we have sent the first set of block reports, then wait a random
|
// If we have sent the first set of block reports, then wait a random
|
||||||
// time before we start the periodic block reports.
|
// time before we start the periodic block reports.
|
||||||
if (resetBlockReportTime) {
|
if (resetBlockReportTime) {
|
||||||
nextBlockReportTime = monotonicNow() +
|
nextBlockReportTime.getAndSet(monotonicNow() +
|
||||||
ThreadLocalRandom.current().nextInt((int)(blockReportIntervalMs));
|
ThreadLocalRandom.current().nextInt((int) (blockReportIntervalMs)));
|
||||||
resetBlockReportTime = false;
|
resetBlockReportTime = false;
|
||||||
} else {
|
} else {
|
||||||
/* say the last block report was at 8:20:14. The current report
|
/* say the last block report was at 8:20:14. The current report
|
||||||
|
@ -1313,17 +1314,16 @@ class BPServiceActor implements Runnable {
|
||||||
* 2) unexpected like 21:35:43, next report should be at 2:20:14
|
* 2) unexpected like 21:35:43, next report should be at 2:20:14
|
||||||
* on the next day.
|
* on the next day.
|
||||||
*/
|
*/
|
||||||
long factor =
|
long factor = (monotonicNow() - nextBlockReportTime.get()
|
||||||
(monotonicNow() - nextBlockReportTime + blockReportIntervalMs)
|
+ blockReportIntervalMs) / blockReportIntervalMs;
|
||||||
/ blockReportIntervalMs;
|
|
||||||
if (factor != 0) {
|
if (factor != 0) {
|
||||||
nextBlockReportTime += factor * blockReportIntervalMs;
|
nextBlockReportTime.getAndAdd(factor * blockReportIntervalMs);
|
||||||
} else {
|
} else {
|
||||||
// If the difference between the present time and the scheduled
|
// If the difference between the present time and the scheduled
|
||||||
// time is very less, the factor can be 0, so in that case, we can
|
// time is very less, the factor can be 0, so in that case, we can
|
||||||
// ignore that negligible time, spent while sending the BRss and
|
// ignore that negligible time, spent while sending the BRss and
|
||||||
// schedule the next BR after the blockReportInterval.
|
// schedule the next BR after the blockReportInterval.
|
||||||
nextBlockReportTime += blockReportIntervalMs;
|
nextBlockReportTime.getAndAdd(blockReportIntervalMs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1336,6 +1336,16 @@ class BPServiceActor implements Runnable {
|
||||||
return nextLifelineTime - monotonicNow();
|
return nextLifelineTime - monotonicNow();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getNextBlockReportTime() {
|
||||||
|
return nextBlockReportTime.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setNextBlockReportTime(long nextBlockReportTime) {
|
||||||
|
this.nextBlockReportTime.getAndSet(nextBlockReportTime);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrapped for testing.
|
* Wrapped for testing.
|
||||||
* @return
|
* @return
|
||||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Random;
|
||||||
|
|
||||||
import static java.lang.Math.abs;
|
import static java.lang.Math.abs;
|
||||||
import static org.hamcrest.core.Is.is;
|
import static org.hamcrest.core.Is.is;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertThat;
|
import static org.junit.Assert.assertThat;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
@ -70,7 +71,7 @@ public class TestBpServiceActorScheduler {
|
||||||
Scheduler scheduler = makeMockScheduler(now);
|
Scheduler scheduler = makeMockScheduler(now);
|
||||||
scheduler.scheduleBlockReport(0, true);
|
scheduler.scheduleBlockReport(0, true);
|
||||||
assertTrue(scheduler.resetBlockReportTime);
|
assertTrue(scheduler.resetBlockReportTime);
|
||||||
assertThat(scheduler.nextBlockReportTime, is(now));
|
assertThat(scheduler.getNextBlockReportTime(), is(now));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,8 +82,8 @@ public class TestBpServiceActorScheduler {
|
||||||
final long delayMs = 10;
|
final long delayMs = 10;
|
||||||
scheduler.scheduleBlockReport(delayMs, true);
|
scheduler.scheduleBlockReport(delayMs, true);
|
||||||
assertTrue(scheduler.resetBlockReportTime);
|
assertTrue(scheduler.resetBlockReportTime);
|
||||||
assertTrue(scheduler.nextBlockReportTime - now >= 0);
|
assertTrue(scheduler.getNextBlockReportTime() - now >= 0);
|
||||||
assertTrue(scheduler.nextBlockReportTime - (now + delayMs) < 0);
|
assertTrue(scheduler.getNextBlockReportTime() - (now + delayMs) < 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,7 +97,8 @@ public class TestBpServiceActorScheduler {
|
||||||
Scheduler scheduler = makeMockScheduler(now);
|
Scheduler scheduler = makeMockScheduler(now);
|
||||||
assertTrue(scheduler.resetBlockReportTime);
|
assertTrue(scheduler.resetBlockReportTime);
|
||||||
scheduler.scheduleNextBlockReport();
|
scheduler.scheduleNextBlockReport();
|
||||||
assertTrue(scheduler.nextBlockReportTime - (now + BLOCK_REPORT_INTERVAL_MS) < 0);
|
assertTrue(scheduler.getNextBlockReportTime()
|
||||||
|
- (now + BLOCK_REPORT_INTERVAL_MS) < 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,7 +112,8 @@ public class TestBpServiceActorScheduler {
|
||||||
Scheduler scheduler = makeMockScheduler(now);
|
Scheduler scheduler = makeMockScheduler(now);
|
||||||
scheduler.resetBlockReportTime = false;
|
scheduler.resetBlockReportTime = false;
|
||||||
scheduler.scheduleNextBlockReport();
|
scheduler.scheduleNextBlockReport();
|
||||||
assertThat(scheduler.nextBlockReportTime, is(now + BLOCK_REPORT_INTERVAL_MS));
|
assertThat(scheduler.getNextBlockReportTime(),
|
||||||
|
is(now + BLOCK_REPORT_INTERVAL_MS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,10 +132,12 @@ public class TestBpServiceActorScheduler {
|
||||||
final long blockReportDelay =
|
final long blockReportDelay =
|
||||||
BLOCK_REPORT_INTERVAL_MS + random.nextInt(2 * (int) BLOCK_REPORT_INTERVAL_MS);
|
BLOCK_REPORT_INTERVAL_MS + random.nextInt(2 * (int) BLOCK_REPORT_INTERVAL_MS);
|
||||||
final long origBlockReportTime = now - blockReportDelay;
|
final long origBlockReportTime = now - blockReportDelay;
|
||||||
scheduler.nextBlockReportTime = origBlockReportTime;
|
scheduler.setNextBlockReportTime(origBlockReportTime);
|
||||||
scheduler.scheduleNextBlockReport();
|
scheduler.scheduleNextBlockReport();
|
||||||
assertTrue(scheduler.nextBlockReportTime - now < BLOCK_REPORT_INTERVAL_MS);
|
assertTrue((scheduler.getNextBlockReportTime() - now)
|
||||||
assertTrue(((scheduler.nextBlockReportTime - origBlockReportTime) % BLOCK_REPORT_INTERVAL_MS) == 0);
|
< BLOCK_REPORT_INTERVAL_MS);
|
||||||
|
assertEquals(0, ((scheduler.getNextBlockReportTime() - origBlockReportTime)
|
||||||
|
% BLOCK_REPORT_INTERVAL_MS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -201,7 +206,7 @@ public class TestBpServiceActorScheduler {
|
||||||
HEARTBEAT_INTERVAL_MS, LIFELINE_INTERVAL_MS,
|
HEARTBEAT_INTERVAL_MS, LIFELINE_INTERVAL_MS,
|
||||||
BLOCK_REPORT_INTERVAL_MS, OUTLIER_REPORT_INTERVAL_MS));
|
BLOCK_REPORT_INTERVAL_MS, OUTLIER_REPORT_INTERVAL_MS));
|
||||||
doReturn(now).when(mockScheduler).monotonicNow();
|
doReturn(now).when(mockScheduler).monotonicNow();
|
||||||
mockScheduler.nextBlockReportTime = now;
|
mockScheduler.setNextBlockReportTime(now);
|
||||||
mockScheduler.nextHeartbeatTime = now;
|
mockScheduler.nextHeartbeatTime = now;
|
||||||
mockScheduler.nextOutliersReportTime = now;
|
mockScheduler.nextOutliersReportTime = now;
|
||||||
return mockScheduler;
|
return mockScheduler;
|
||||||
|
|
Loading…
Reference in New Issue