From 50778f9d458443c2cbfead4502df4e1204c4d567 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Mon, 11 May 2015 14:33:45 -0500 Subject: [PATCH] HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor goes for infinite loop. Contributed by Rushabh Shah. (cherry picked from commit ea11590aad952b5b560a5101d064adf27d8656db) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/datanode/ErrorReportAction.java | 4 ++ .../server/datanode/ReportBadBlockAction.java | 4 ++ .../server/datanode/TestBPOfferService.java | 54 +++++++++++++++++++ 4 files changed, 65 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 95acf8a9e60..09529e97804 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -484,6 +484,9 @@ Release 2.7.1 - UNRELEASED HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block request is in edit log. (Rushabh S Shah via kihwal) + HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor + goes for infinite loop (Rushabh S Shah via kihwal) + Release 2.7.0 - 2015-04-20 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java index 331822ac6d4..b7a9dae77a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; +import org.apache.hadoop.ipc.RemoteException; /** @@ -43,6 +44,9 @@ public class ErrorReportAction implements BPServiceActorAction { DatanodeRegistration bpRegistration) throws BPServiceActorActionException { try { bpNamenode.errorReport(bpRegistration, errorCode, errorMessage); + } catch (RemoteException re) { + DataNode.LOG.info("trySendErrorReport encountered RemoteException " + + "errorMessage: " + errorMessage + " errorCode: " + errorCode, re); } catch(IOException e) { throw new BPServiceActorActionException("Error reporting " + "an error to namenode: "); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java index 7155eae3505..671a1fea18a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; +import org.apache.hadoop.ipc.RemoteException; /** * ReportBadBlockAction is an instruction issued by {{BPOfferService}} to @@ -59,6 +60,9 @@ public class ReportBadBlockAction implements BPServiceActorAction { try { bpNamenode.reportBadBlocks(locatedBlock); + } catch (RemoteException re) { + DataNode.LOG.info("reportBadBlock encountered RemoteException for " + + "block: " + block , re); } catch (IOException e) { throw new BPServiceActorActionException("Failed to report bad block " + block + " to namenode: "); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index 3aa9a7b7b69..64cc78bf218 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -55,6 +55,9 @@ import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.ipc.StandbyException; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; import org.apache.hadoop.util.Time; @@ -621,4 +624,55 @@ public class TestBPOfferService { bpos.stop(); } } + + /** + * This test case doesn't add the reportBadBlock request to + * {@link BPServiceActor#bpThreadEnqueue} when the Standby namenode throws + * {@link StandbyException} + * @throws Exception + */ + @Test + public void testReportBadBlocksWhenNNThrowsStandbyException() + throws Exception { + BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2); + bpos.start(); + try { + waitForInitialization(bpos); + // Should start with neither NN as active. + assertNull(bpos.getActiveNN()); + // Have NN1 claim active at txid 1 + mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1); + bpos.triggerHeartbeatForTests(); + // Now mockNN1 is acting like active namenode and mockNN2 as Standby + assertSame(mockNN1, bpos.getActiveNN()); + // Return nothing when active Active Namenode calls reportBadBlocks + Mockito.doNothing().when(mockNN1).reportBadBlocks + (Mockito.any(LocatedBlock[].class)); + + RemoteException re = new RemoteException(StandbyException.class. + getName(), "Operation category WRITE is not supported in state " + + "standby", RpcErrorCodeProto.ERROR_APPLICATION); + // Return StandbyException wrapped in RemoteException when Standby NN + // calls reportBadBlocks + Mockito.doThrow(re).when(mockNN2).reportBadBlocks + (Mockito.any(LocatedBlock[].class)); + + bpos.reportBadBlocks(FAKE_BLOCK, mockFSDataset.getVolume(FAKE_BLOCK) + .getStorageID(), mockFSDataset.getVolume(FAKE_BLOCK) + .getStorageType()); + // Send heartbeat so that the BpServiceActor can report bad block to + // namenode + bpos.triggerHeartbeatForTests(); + Mockito.verify(mockNN2, Mockito.times(1)) + .reportBadBlocks(Mockito.any(LocatedBlock[].class)); + + // Trigger another heartbeat, this will send reportBadBlock again if it + // is present in the queue. + bpos.triggerHeartbeatForTests(); + Mockito.verify(mockNN2, Mockito.times(1)) + .reportBadBlocks(Mockito.any(LocatedBlock[].class)); + } finally { + bpos.stop(); + } + } }