HDFS-14314. fullBlockReportLeaseId should be reset after registering to NN. Contributed by star.
(cherry picked from commit 387dbe587aa66ac99ec5f5b50827ec3e0a327613) (cherry picked from commit e58ccca3ce131c955ceb115cd0b75e452eea828b) (cherry picked from commit d951497f57cf6556b0916cad08576481dfe2ae06) (cherry picked from commit e23a448e0e32ce5139b76d47c73fce621ccb66bd)
This commit is contained in:
parent
5bed24ad5f
commit
d71cfe1461
@ -105,6 +105,7 @@ static enum RunningState {
|
|||||||
private final DataNode dn;
|
private final DataNode dn;
|
||||||
private final DNConf dnConf;
|
private final DNConf dnConf;
|
||||||
private long prevBlockReportId;
|
private long prevBlockReportId;
|
||||||
|
private long fullBlockReportLeaseId;
|
||||||
private final SortedSet<Integer> blockReportSizes =
|
private final SortedSet<Integer> blockReportSizes =
|
||||||
Collections.synchronizedSortedSet(new TreeSet<Integer>());
|
Collections.synchronizedSortedSet(new TreeSet<Integer>());
|
||||||
private final int maxDataLength;
|
private final int maxDataLength;
|
||||||
@ -129,6 +130,7 @@ static enum RunningState {
|
|||||||
dnConf.ibrInterval,
|
dnConf.ibrInterval,
|
||||||
dn.getMetrics());
|
dn.getMetrics());
|
||||||
prevBlockReportId = ThreadLocalRandom.current().nextLong();
|
prevBlockReportId = ThreadLocalRandom.current().nextLong();
|
||||||
|
fullBlockReportLeaseId = 0;
|
||||||
scheduler = new Scheduler(dnConf.heartBeatInterval,
|
scheduler = new Scheduler(dnConf.heartBeatInterval,
|
||||||
dnConf.getLifelineIntervalMs(), dnConf.blockReportInterval,
|
dnConf.getLifelineIntervalMs(), dnConf.blockReportInterval,
|
||||||
dnConf.outliersReportIntervalMs);
|
dnConf.outliersReportIntervalMs);
|
||||||
@ -616,7 +618,6 @@ private void offerService() throws Exception {
|
|||||||
+ "; heartBeatInterval=" + dnConf.heartBeatInterval
|
+ "; heartBeatInterval=" + dnConf.heartBeatInterval
|
||||||
+ (lifelineSender != null ?
|
+ (lifelineSender != null ?
|
||||||
"; lifelineIntervalMs=" + dnConf.getLifelineIntervalMs() : ""));
|
"; lifelineIntervalMs=" + dnConf.getLifelineIntervalMs() : ""));
|
||||||
long fullBlockReportLeaseId = 0;
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Now loop for a long time....
|
// Now loop for a long time....
|
||||||
@ -782,6 +783,10 @@ void register(NamespaceInfo nsInfo) throws IOException {
|
|||||||
LOG.info("Block pool " + this + " successfully registered with NN");
|
LOG.info("Block pool " + this + " successfully registered with NN");
|
||||||
bpos.registrationSucceeded(this, bpRegistration);
|
bpos.registrationSucceeded(this, bpRegistration);
|
||||||
|
|
||||||
|
// reset lease id whenever registered to NN.
|
||||||
|
// ask for a new lease id at the next heartbeat.
|
||||||
|
fullBlockReportLeaseId = 0;
|
||||||
|
|
||||||
// random short delay - helps scatter the BR from all DNs
|
// random short delay - helps scatter the BR from all DNs
|
||||||
scheduler.scheduleBlockReport(dnConf.initialBlockReportDelayMs);
|
scheduler.scheduleBlockReport(dnConf.initialBlockReportDelayMs);
|
||||||
}
|
}
|
||||||
|
@ -27,12 +27,12 @@
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.ConnectException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ThreadLocalRandom;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
@ -63,6 +63,8 @@
|
|||||||
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
|
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
|
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.BlockReportContext;
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
import org.apache.hadoop.ipc.StandbyException;
|
import org.apache.hadoop.ipc.StandbyException;
|
||||||
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto;
|
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto;
|
||||||
@ -92,6 +94,9 @@ public class TestBPOfferService {
|
|||||||
private static final File TEST_BUILD_DATA = PathUtils.getTestDir(TestBPOfferService.class);
|
private static final File TEST_BUILD_DATA = PathUtils.getTestDir(TestBPOfferService.class);
|
||||||
private long firstCallTime = 0;
|
private long firstCallTime = 0;
|
||||||
private long secondCallTime = 0;
|
private long secondCallTime = 0;
|
||||||
|
private long firstLeaseId = 0;
|
||||||
|
private long secondLeaseId = 0;
|
||||||
|
private long nextFullBlockReportLeaseId = 1L;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL);
|
GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL);
|
||||||
@ -171,16 +176,24 @@ private DatanodeProtocolClientSideTranslatorPB setupNNMock(int nnIdx)
|
|||||||
private class HeartbeatAnswer implements Answer<HeartbeatResponse> {
|
private class HeartbeatAnswer implements Answer<HeartbeatResponse> {
|
||||||
private final int nnIdx;
|
private final int nnIdx;
|
||||||
|
|
||||||
public HeartbeatAnswer(int nnIdx) {
|
HeartbeatAnswer(int nnIdx) {
|
||||||
this.nnIdx = nnIdx;
|
this.nnIdx = nnIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HeartbeatResponse answer(InvocationOnMock invocation) throws Throwable {
|
public HeartbeatResponse answer(InvocationOnMock invocation)
|
||||||
|
throws Throwable {
|
||||||
heartbeatCounts[nnIdx]++;
|
heartbeatCounts[nnIdx]++;
|
||||||
|
Boolean requestFullBlockReportLease =
|
||||||
|
(Boolean) invocation.getArguments()[8];
|
||||||
|
long fullBlockReportLeaseId = 0;
|
||||||
|
if (requestFullBlockReportLease) {
|
||||||
|
fullBlockReportLeaseId = nextFullBlockReportLeaseId++;
|
||||||
|
}
|
||||||
|
LOG.info("fullBlockReportLeaseId=" + fullBlockReportLeaseId);
|
||||||
HeartbeatResponse heartbeatResponse = new HeartbeatResponse(
|
HeartbeatResponse heartbeatResponse = new HeartbeatResponse(
|
||||||
datanodeCommands[nnIdx], mockHaStatuses[nnIdx], null,
|
datanodeCommands[nnIdx], mockHaStatuses[nnIdx], null,
|
||||||
ThreadLocalRandom.current().nextLong() | 1L);
|
fullBlockReportLeaseId);
|
||||||
//reset the command
|
//reset the command
|
||||||
datanodeCommands[nnIdx] = new DatanodeCommand[0];
|
datanodeCommands[nnIdx] = new DatanodeCommand[0];
|
||||||
return heartbeatResponse;
|
return heartbeatResponse;
|
||||||
@ -188,6 +201,24 @@ public HeartbeatResponse answer(InvocationOnMock invocation) throws Throwable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private class HeartbeatRegisterAnswer implements Answer<HeartbeatResponse> {
|
||||||
|
private final int nnIdx;
|
||||||
|
|
||||||
|
HeartbeatRegisterAnswer(int nnIdx) {
|
||||||
|
this.nnIdx = nnIdx;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HeartbeatResponse answer(InvocationOnMock invocation)
|
||||||
|
throws Throwable {
|
||||||
|
heartbeatCounts[nnIdx]++;
|
||||||
|
DatanodeCommand[] cmds = new DatanodeCommand[1];
|
||||||
|
cmds[0] = new RegisterCommand();
|
||||||
|
return new HeartbeatResponse(cmds, mockHaStatuses[nnIdx],
|
||||||
|
null, 0L);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that the BPOS can register to talk to two different NNs,
|
* Test that the BPOS can register to talk to two different NNs,
|
||||||
* sends block reports to both, etc.
|
* sends block reports to both, etc.
|
||||||
@ -523,6 +554,26 @@ private Boolean get(DatanodeProtocolClientSideTranslatorPB mockNN) {
|
|||||||
}, 500, 10000);
|
}, 500, 10000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void waitForRegistration(
|
||||||
|
final DatanodeProtocolClientSideTranslatorPB mockNN, int times)
|
||||||
|
throws Exception {
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
try {
|
||||||
|
// The DN should have register to both NNs.
|
||||||
|
// first called by connectToNNAndHandshake, then called by reRegister.
|
||||||
|
Mockito.verify(mockNN, Mockito.times(2))
|
||||||
|
.registerDatanode(Mockito.any(DatanodeRegistration.class));
|
||||||
|
return true;
|
||||||
|
} catch (Throwable t) {
|
||||||
|
LOG.info("waiting on block registerDatanode: " + t.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 500, 10000);
|
||||||
|
}
|
||||||
|
|
||||||
private ReceivedDeletedBlockInfo[] waitForBlockReceived(
|
private ReceivedDeletedBlockInfo[] waitForBlockReceived(
|
||||||
final ExtendedBlock fakeBlock,
|
final ExtendedBlock fakeBlock,
|
||||||
final DatanodeProtocolClientSideTranslatorPB mockNN) throws Exception {
|
final DatanodeProtocolClientSideTranslatorPB mockNN) throws Exception {
|
||||||
@ -866,7 +917,7 @@ public void testNNHAStateUpdateFromVersionRequest() throws Exception {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(timeout = 30000)
|
||||||
public void testRefreshNameNodes() throws Exception {
|
public void testRefreshNameNodes() throws Exception {
|
||||||
|
|
||||||
BPOfferService bpos = setupBPOSForNNs(mockDn, mockNN1, mockNN2);
|
BPOfferService bpos = setupBPOSForNNs(mockDn, mockNN1, mockNN2);
|
||||||
@ -935,4 +986,68 @@ public void testRefreshNameNodes() throws Exception {
|
|||||||
bpos.join();
|
bpos.join();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 15000)
|
||||||
|
public void testRefreshLeaseId() throws Exception {
|
||||||
|
Mockito.when(mockNN1.sendHeartbeat(
|
||||||
|
Mockito.any(DatanodeRegistration.class),
|
||||||
|
Mockito.any(StorageReport[].class),
|
||||||
|
Mockito.anyLong(),
|
||||||
|
Mockito.anyLong(),
|
||||||
|
Mockito.anyInt(),
|
||||||
|
Mockito.anyInt(),
|
||||||
|
Mockito.anyInt(),
|
||||||
|
Mockito.any(VolumeFailureSummary.class),
|
||||||
|
Mockito.anyBoolean(),
|
||||||
|
Mockito.any(SlowPeerReports.class),
|
||||||
|
Mockito.any(SlowDiskReports.class)))
|
||||||
|
//heartbeat to old NN instance
|
||||||
|
.thenAnswer(new HeartbeatAnswer(0))
|
||||||
|
//heartbeat to new NN instance with Register Command
|
||||||
|
.thenAnswer(new HeartbeatRegisterAnswer(0))
|
||||||
|
.thenAnswer(new HeartbeatAnswer(0));
|
||||||
|
|
||||||
|
Mockito.when(mockNN1.blockReport(
|
||||||
|
Mockito.any(DatanodeRegistration.class),
|
||||||
|
Mockito.anyString(),
|
||||||
|
Mockito.any(StorageBlockReport[].class),
|
||||||
|
Mockito.any(BlockReportContext.class)))
|
||||||
|
.thenAnswer(
|
||||||
|
new Answer() {
|
||||||
|
@Override
|
||||||
|
public Object answer(InvocationOnMock invocation)
|
||||||
|
throws Throwable {
|
||||||
|
BlockReportContext context =
|
||||||
|
(BlockReportContext) invocation.getArguments()[3];
|
||||||
|
long leaseId = context.getLeaseId();
|
||||||
|
LOG.info("leaseId = "+leaseId);
|
||||||
|
|
||||||
|
// leaseId == 1 means DN make block report with old leaseId
|
||||||
|
// just reject and wait until DN request for a new leaseId
|
||||||
|
if(leaseId == 1) {
|
||||||
|
firstLeaseId = leaseId;
|
||||||
|
throw new ConnectException(
|
||||||
|
"network is not reachable for test. ");
|
||||||
|
} else {
|
||||||
|
secondLeaseId = leaseId;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
BPOfferService bpos = setupBPOSForNNs(mockNN1);
|
||||||
|
bpos.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
waitForInitialization(bpos);
|
||||||
|
// Should call registration 2 times
|
||||||
|
waitForRegistration(mockNN1, 2);
|
||||||
|
assertEquals(1L, firstLeaseId);
|
||||||
|
while(secondLeaseId != 2L) {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
bpos.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user