HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor goes for infinite loop. Contributed by Rushabh Shah.
This commit is contained in:
parent
cbea5d2db4
commit
ea11590aad
|
@ -814,6 +814,9 @@ Release 2.7.1 - UNRELEASED
|
|||
HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block
|
||||
request is in edit log. (Rushabh S Shah via kihwal)
|
||||
|
||||
HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor
|
||||
goes for infinite loop (Rushabh S Shah via kihwal)
|
||||
|
||||
Release 2.7.0 - 2015-04-20
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||
import org.apache.hadoop.ipc.RemoteException;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -43,6 +44,9 @@ public class ErrorReportAction implements BPServiceActorAction {
|
|||
DatanodeRegistration bpRegistration) throws BPServiceActorActionException {
|
||||
try {
|
||||
bpNamenode.errorReport(bpRegistration, errorCode, errorMessage);
|
||||
} catch (RemoteException re) {
|
||||
DataNode.LOG.info("trySendErrorReport encountered RemoteException "
|
||||
+ "errorMessage: " + errorMessage + " errorCode: " + errorCode, re);
|
||||
} catch(IOException e) {
|
||||
throw new BPServiceActorActionException("Error reporting "
|
||||
+ "an error to namenode: ");
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||
import org.apache.hadoop.ipc.RemoteException;
|
||||
|
||||
/**
|
||||
* ReportBadBlockAction is an instruction issued by {{BPOfferService}} to
|
||||
|
@ -59,6 +60,9 @@ public class ReportBadBlockAction implements BPServiceActorAction {
|
|||
|
||||
try {
|
||||
bpNamenode.reportBadBlocks(locatedBlock);
|
||||
} catch (RemoteException re) {
|
||||
DataNode.LOG.info("reportBadBlock encountered RemoteException for "
|
||||
+ "block: " + block , re);
|
||||
} catch (IOException e) {
|
||||
throw new BPServiceActorActionException("Failed to report bad block "
|
||||
+ block + " to namenode: ");
|
||||
|
|
|
@ -55,6 +55,9 @@ import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
|
|||
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
||||
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
|
||||
import org.apache.hadoop.ipc.RemoteException;
|
||||
import org.apache.hadoop.ipc.StandbyException;
|
||||
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.test.PathUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
|
@ -621,4 +624,55 @@ public class TestBPOfferService {
|
|||
bpos.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This test case doesn't add the reportBadBlock request to
|
||||
* {@link BPServiceActor#bpThreadEnqueue} when the Standby namenode throws
|
||||
* {@link StandbyException}
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testReportBadBlocksWhenNNThrowsStandbyException()
|
||||
throws Exception {
|
||||
BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
|
||||
bpos.start();
|
||||
try {
|
||||
waitForInitialization(bpos);
|
||||
// Should start with neither NN as active.
|
||||
assertNull(bpos.getActiveNN());
|
||||
// Have NN1 claim active at txid 1
|
||||
mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1);
|
||||
bpos.triggerHeartbeatForTests();
|
||||
// Now mockNN1 is acting like active namenode and mockNN2 as Standby
|
||||
assertSame(mockNN1, bpos.getActiveNN());
|
||||
// Return nothing when active Active Namenode calls reportBadBlocks
|
||||
Mockito.doNothing().when(mockNN1).reportBadBlocks
|
||||
(Mockito.any(LocatedBlock[].class));
|
||||
|
||||
RemoteException re = new RemoteException(StandbyException.class.
|
||||
getName(), "Operation category WRITE is not supported in state "
|
||||
+ "standby", RpcErrorCodeProto.ERROR_APPLICATION);
|
||||
// Return StandbyException wrapped in RemoteException when Standby NN
|
||||
// calls reportBadBlocks
|
||||
Mockito.doThrow(re).when(mockNN2).reportBadBlocks
|
||||
(Mockito.any(LocatedBlock[].class));
|
||||
|
||||
bpos.reportBadBlocks(FAKE_BLOCK, mockFSDataset.getVolume(FAKE_BLOCK)
|
||||
.getStorageID(), mockFSDataset.getVolume(FAKE_BLOCK)
|
||||
.getStorageType());
|
||||
// Send heartbeat so that the BpServiceActor can report bad block to
|
||||
// namenode
|
||||
bpos.triggerHeartbeatForTests();
|
||||
Mockito.verify(mockNN2, Mockito.times(1))
|
||||
.reportBadBlocks(Mockito.any(LocatedBlock[].class));
|
||||
|
||||
// Trigger another heartbeat, this will send reportBadBlock again if it
|
||||
// is present in the queue.
|
||||
bpos.triggerHeartbeatForTests();
|
||||
Mockito.verify(mockNN2, Mockito.times(1))
|
||||
.reportBadBlocks(Mockito.any(LocatedBlock[].class));
|
||||
} finally {
|
||||
bpos.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue