HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor goes for infinite loop. Contributed by Rushabh Shah.

This commit is contained in:
Kihwal Lee 2015-05-11 14:30:35 -05:00
parent cbea5d2db4
commit ea11590aad
4 changed files with 65 additions and 0 deletions

View File

@ -814,6 +814,9 @@ Release 2.7.1 - UNRELEASED
HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block
request is in edit log. (Rushabh S Shah via kihwal)
HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor
goes for infinite loop (Rushabh S Shah via kihwal)
Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.ipc.RemoteException;
/**
@ -43,6 +44,9 @@ public class ErrorReportAction implements BPServiceActorAction {
DatanodeRegistration bpRegistration) throws BPServiceActorActionException {
try {
bpNamenode.errorReport(bpRegistration, errorCode, errorMessage);
} catch (RemoteException re) {
DataNode.LOG.info("trySendErrorReport encountered RemoteException "
+ "errorMessage: " + errorMessage + " errorCode: " + errorCode, re);
} catch(IOException e) {
throw new BPServiceActorActionException("Error reporting "
+ "an error to namenode: ");

View File

@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.ipc.RemoteException;
/**
* ReportBadBlockAction is an instruction issued by {{BPOfferService}} to
@ -59,6 +60,9 @@ public class ReportBadBlockAction implements BPServiceActorAction {
try {
bpNamenode.reportBadBlocks(locatedBlock);
} catch (RemoteException re) {
DataNode.LOG.info("reportBadBlock encountered RemoteException for "
+ "block: " + block , re);
} catch (IOException e) {
throw new BPServiceActorActionException("Failed to report bad block "
+ block + " to namenode: ");

View File

@ -55,6 +55,9 @@ import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.PathUtils;
import org.apache.hadoop.util.Time;
@ -621,4 +624,55 @@ public class TestBPOfferService {
bpos.stop();
}
}
/**
* This test case doesn't add the reportBadBlock request to
* {@link BPServiceActor#bpThreadEnqueue} when the Standby namenode throws
* {@link StandbyException}
* @throws Exception
*/
@Test
public void testReportBadBlocksWhenNNThrowsStandbyException()
throws Exception {
BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
bpos.start();
try {
waitForInitialization(bpos);
// Should start with neither NN as active.
assertNull(bpos.getActiveNN());
// Have NN1 claim active at txid 1
mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1);
bpos.triggerHeartbeatForTests();
// Now mockNN1 is acting like active namenode and mockNN2 as Standby
assertSame(mockNN1, bpos.getActiveNN());
// Return nothing when active Active Namenode calls reportBadBlocks
Mockito.doNothing().when(mockNN1).reportBadBlocks
(Mockito.any(LocatedBlock[].class));
RemoteException re = new RemoteException(StandbyException.class.
getName(), "Operation category WRITE is not supported in state "
+ "standby", RpcErrorCodeProto.ERROR_APPLICATION);
// Return StandbyException wrapped in RemoteException when Standby NN
// calls reportBadBlocks
Mockito.doThrow(re).when(mockNN2).reportBadBlocks
(Mockito.any(LocatedBlock[].class));
bpos.reportBadBlocks(FAKE_BLOCK, mockFSDataset.getVolume(FAKE_BLOCK)
.getStorageID(), mockFSDataset.getVolume(FAKE_BLOCK)
.getStorageType());
// Send heartbeat so that the BpServiceActor can report bad block to
// namenode
bpos.triggerHeartbeatForTests();
Mockito.verify(mockNN2, Mockito.times(1))
.reportBadBlocks(Mockito.any(LocatedBlock[].class));
// Trigger another heartbeat, this will send reportBadBlock again if it
// is present in the queue.
bpos.triggerHeartbeatForTests();
Mockito.verify(mockNN2, Mockito.times(1))
.reportBadBlocks(Mockito.any(LocatedBlock[].class));
} finally {
bpos.stop();
}
}
}