From 49d7bb6a92026ab34eb1a9f06c059b0f98c2698b Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Fri, 4 May 2018 12:22:12 -0700 Subject: [PATCH] HDFS-13286. [SBN read] Add haadmin commands to transition between standby and observer. Contributed by Chao Sun. --- .../apache/hadoop/ha/FailoverController.java | 2 +- .../java/org/apache/hadoop/ha/HAAdmin.java | 42 ++++++++++++++++++ .../apache/hadoop/ha/HAServiceProtocol.java | 18 ++++++++ .../hadoop/ha/HAServiceProtocolHelper.java | 9 ++++ .../org/apache/hadoop/ha/HAServiceTarget.java | 7 +++ ...ServiceProtocolClientSideTranslatorPB.java | 16 +++++++ ...ServiceProtocolServerSideTranslatorPB.java | 20 +++++++++ .../src/main/proto/HAServiceProtocol.proto | 20 +++++++++ .../org/apache/hadoop/ha/DummyHAService.java | 18 +++++++- .../org/apache/hadoop/ha/MiniZKFCCluster.java | 4 ++ .../FederationNamenodeServiceState.java | 3 ++ .../hadoop/hdfs/protocolPB/PBHelper.java | 4 ++ .../hdfs/server/datanode/BPServiceActor.java | 2 +- .../hdfs/server/namenode/FSNamesystem.java | 3 +- .../hadoop/hdfs/server/namenode/NameNode.java | 29 +++++++----- .../server/namenode/NameNodeRpcServer.java | 8 ++++ .../hdfs/server/namenode/ha/StandbyState.java | 12 +++-- .../hadoop/hdfs/tools/NNHAServiceTarget.java | 5 +++ .../src/main/proto/HdfsServer.proto | 1 + .../hadoop/hdfs/tools/TestDFSHAAdmin.java | 6 +++ .../hdfs/tools/TestDFSHAAdminMiniCluster.java | 44 +++++++++++++++++++ .../server/resourcemanager/AdminService.java | 7 +++ 22 files changed, 259 insertions(+), 21 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java index b86ae29af2e..4fc52d557cf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java @@ -129,7 +129,7 @@ public class FailoverController { if (!toSvcStatus.getState().equals(HAServiceState.STANDBY)) { throw new FailoverFailedException( - "Can't failover to an active service"); + "Can't failover to an " + toSvcStatus.getState() + " service"); } if (!toSvcStatus.isReadyToBecomeActive()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java index e955979acde..59e20d75bc1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java @@ -72,6 +72,9 @@ public abstract class HAAdmin extends Configured implements Tool { new UsageInfo("[--"+FORCEACTIVE+"] ", "Transitions the service into Active state")) .put("-transitionToStandby", new UsageInfo("", "Transitions the service into Standby state")) + .put("-transitionToObserver", + new UsageInfo("", + "Transitions the service into Observer state")) .put("-failover", new UsageInfo("[--"+FORCEFENCE+"] [--"+FORCEACTIVE+"] ", "Failover from the first service to the second.\n" + @@ -221,6 +224,28 @@ public abstract class HAAdmin extends Configured implements Tool { HAServiceProtocolHelper.transitionToStandby(proto, createReqInfo()); return 0; } + + private int transitionToObserver(final CommandLine cmd) + throws IOException, ServiceFailedException { + String[] argv = cmd.getArgs(); + if (argv.length != 1) { + errOut.println("transitionToObserver: incorrect number of arguments"); + printUsage(errOut, "-transitionToObserver"); + return -1; + } + + HAServiceTarget target = resolveTarget(argv[0]); + if (!checkSupportObserver(target)) { + return -1; + } + if (!checkManualStateManagementOK(target)) { + return -1; + } + HAServiceProtocol proto = target.getProxy(getConf(), 0); + HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo()); + return 0; + } + /** * Ensure that we are allowed to manually manage the HA state of the target * service. If automatic failover is configured, then the automatic @@ -249,6 +274,21 @@ public abstract class HAAdmin extends Configured implements Tool { return true; } + /** + * Check if the target supports the Observer state. + * @param target the target to check + * @return true if the target support Observer state, false otherwise. + */ + private boolean checkSupportObserver(HAServiceTarget target) { + if (target.supportObserver()) { + return true; + } else { + errOut.println( + "The target " + target + " doesn't support Observer state."); + return false; + } + } + private StateChangeRequestInfo createReqInfo() { return new StateChangeRequestInfo(requestSource); } @@ -461,6 +501,8 @@ public abstract class HAAdmin extends Configured implements Tool { return transitionToActive(cmdLine); } else if ("-transitionToStandby".equals(cmd)) { return transitionToStandby(cmdLine); + } else if ("-transitionToObserver".equals(cmd)) { + return transitionToObserver(cmdLine); } else if ("-failover".equals(cmd)) { return failover(cmdLine); } else if ("-getServiceState".equals(cmd)) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java index 7099de8d22c..74a3d121a1a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java @@ -51,6 +51,7 @@ public interface HAServiceProtocol { INITIALIZING("initializing"), ACTIVE("active"), STANDBY("standby"), + OBSERVER("observer"), STOPPING("stopping"); private String name; @@ -148,6 +149,23 @@ public interface HAServiceProtocol { AccessControlException, IOException; + /** + * Request service to transition to observer state. No operation, if the + * service is already in observer state. + * + * @throws ServiceFailedException + * if transition from standby to observer fails. + * @throws AccessControlException + * if access is denied. + * @throws IOException + * if other errors happen + */ + @Idempotent + void transitionToObserver(StateChangeRequestInfo reqInfo) + throws ServiceFailedException, + AccessControlException, + IOException; + /** * Return the current status of the service. The status indicates * the current state (e.g ACTIVE/STANDBY) as well as diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java index 58d4a7f4af5..a2441fb3d06 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java @@ -60,4 +60,13 @@ public class HAServiceProtocolHelper { throw e.unwrapRemoteException(ServiceFailedException.class); } } + + public static void transitionToObserver(HAServiceProtocol svc, + StateChangeRequestInfo reqInfo) throws IOException { + try { + svc.transitionToObserver(reqInfo); + } catch (RemoteException e) { + throw e.unwrapRemoteException(ServiceFailedException.class); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java index 98aab99854c..4a2a21bafb0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java @@ -170,4 +170,11 @@ public abstract class HAServiceTarget { public boolean isAutoFailoverEnabled() { return false; } + + /** + * @return true if this target supports the Observer state, false otherwise. + */ + public boolean supportObserver() { + return false; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java index 589ccd142df..fec519f3761 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java @@ -36,6 +36,7 @@ import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto; +import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToObserverRequestProto; import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.ProtocolTranslator; @@ -115,6 +116,19 @@ public class HAServiceProtocolClientSideTranslatorPB implements } } + @Override + public void transitionToObserver(StateChangeRequestInfo reqInfo) + throws IOException { + try { + TransitionToObserverRequestProto req = + TransitionToObserverRequestProto.newBuilder() + .setReqInfo(convert(reqInfo)).build(); + rpcProxy.transitionToObserver(NULL_CONTROLLER, req); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } + @Override public HAServiceStatus getServiceStatus() throws IOException { GetServiceStatusResponseProto status; @@ -141,6 +155,8 @@ public class HAServiceProtocolClientSideTranslatorPB implements return HAServiceState.ACTIVE; case STANDBY: return HAServiceState.STANDBY; + case OBSERVER: + return HAServiceState.OBSERVER; case INITIALIZING: default: return HAServiceState.INITIALIZING; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java index 7f755825e97..72787cfe993 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java @@ -35,6 +35,8 @@ import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequ import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveResponseProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyResponseProto; +import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToObserverRequestProto; +import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToObserverResponseProto; import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.RPC; @@ -61,6 +63,9 @@ public class HAServiceProtocolServerSideTranslatorPB implements TransitionToActiveResponseProto.newBuilder().build(); private static final TransitionToStandbyResponseProto TRANSITION_TO_STANDBY_RESP = TransitionToStandbyResponseProto.newBuilder().build(); + private static final TransitionToObserverResponseProto + TRANSITION_TO_OBSERVER_RESP = + TransitionToObserverResponseProto.newBuilder().build(); private static final Logger LOG = LoggerFactory.getLogger( HAServiceProtocolServerSideTranslatorPB.class); @@ -123,6 +128,18 @@ public class HAServiceProtocolServerSideTranslatorPB implements } } + @Override + public TransitionToObserverResponseProto transitionToObserver( + RpcController controller, TransitionToObserverRequestProto request) + throws ServiceException { + try { + server.transitionToObserver(convert(request.getReqInfo())); + return TRANSITION_TO_OBSERVER_RESP; + } catch (IOException e) { + throw new ServiceException(e); + } + } + @Override public GetServiceStatusResponseProto getServiceStatus(RpcController controller, GetServiceStatusRequestProto request) throws ServiceException { @@ -141,6 +158,9 @@ public class HAServiceProtocolServerSideTranslatorPB implements case STANDBY: retState = HAServiceStateProto.STANDBY; break; + case OBSERVER: + retState = HAServiceStateProto.OBSERVER; + break; case INITIALIZING: default: retState = HAServiceStateProto.INITIALIZING; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto index e0060f25041..16ee9a2e0a5 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto @@ -32,6 +32,7 @@ enum HAServiceStateProto { INITIALIZING = 0; ACTIVE = 1; STANDBY = 2; + OBSERVER = 3; } enum HARequestSource { @@ -82,6 +83,19 @@ message TransitionToStandbyRequestProto { message TransitionToStandbyResponseProto { } +/** + * void request + */ +message TransitionToObserverRequestProto { + required HAStateChangeRequestInfoProto reqInfo = 1; +} + +/** + * void response + */ +message TransitionToObserverResponseProto { +} + /** * void request */ @@ -126,6 +140,12 @@ service HAServiceProtocolService { rpc transitionToStandby(TransitionToStandbyRequestProto) returns(TransitionToStandbyResponseProto); + /** + * Request service to transition to observer state. + */ + rpc transitionToObserver(TransitionToObserverRequestProto) + returns(TransitionToObserverResponseProto); + /** * Get the current status of the service. */ diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java index 6f01be89cde..51112bedefa 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java @@ -56,7 +56,8 @@ class DummyHAService extends HAServiceTarget { InetSocketAddress address, healthMonitorAddress; boolean isHealthy = true; boolean actUnreachable = false; - boolean failToBecomeActive, failToBecomeStandby, failToFence; + boolean failToBecomeActive, failToBecomeStandby, failToBecomeObserver, + failToFence; DummySharedResource sharedResource; public int fenceCount = 0; @@ -216,6 +217,11 @@ class DummyHAService extends HAServiceTarget { return true; } + @Override + public boolean supportObserver() { + return true; + } + @Override public String toString() { return "DummyHAService #" + index; @@ -263,6 +269,16 @@ class DummyHAService extends HAServiceTarget { state = HAServiceState.STANDBY; } + @Override + public void transitionToObserver(StateChangeRequestInfo req) + throws ServiceFailedException, AccessControlException, IOException { + checkUnreachable(); + if (failToBecomeObserver) { + throw new ServiceFailedException("injected failure"); + } + state = HAServiceState.OBSERVER; + } + @Override public HAServiceStatus getServiceStatus() throws IOException { checkUnreachable(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java index 9146e01e6e1..f63d267f291 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java @@ -187,6 +187,10 @@ public class MiniZKFCCluster { svcs.get(idx).actUnreachable = unreachable; } + public void setFailToBecomeObserver(int idx, boolean doFail) { + svcs.get(idx).failToBecomeObserver = doFail; + } + /** * Wait for the given HA service to enter the given HA state. * This is based on the state of ZKFC, not the state of HA service. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamenodeServiceState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamenodeServiceState.java index 7907e3073e9..ed8f8c04562 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamenodeServiceState.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamenodeServiceState.java @@ -35,6 +35,9 @@ public enum FederationNamenodeServiceState { case ACTIVE: return FederationNamenodeServiceState.ACTIVE; case STANDBY: + // TODO: we should probably have a separate state OBSERVER for RBF and + // treat it differently. + case OBSERVER: return FederationNamenodeServiceState.STANDBY; case INITIALIZING: return FederationNamenodeServiceState.UNAVAILABLE; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index ac013487a9b..baec6fafbe6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -779,6 +779,8 @@ public class PBHelper { return HAServiceState.ACTIVE; case STANDBY: return HAServiceState.STANDBY; + case OBSERVER: + return HAServiceState.OBSERVER; default: throw new IllegalArgumentException("Unexpected HAServiceStateProto:" + s); @@ -794,6 +796,8 @@ public class PBHelper { return NNHAStatusHeartbeatProto.State.ACTIVE; case STANDBY: return NNHAStatusHeartbeatProto.State.STANDBY; + case OBSERVER: + return NNHAStatusHeartbeatProto.State.OBSERVER; default: throw new IllegalArgumentException("Unexpected HAServiceState:" + s); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index aa614c98340..93af5ec6efc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -912,7 +912,7 @@ class BPServiceActor implements Runnable { scheduler.scheduleHeartbeat(); // HDFS-9917,Standby NN IBR can be very huge if standby namenode is down // for sometime. - if (state == HAServiceState.STANDBY) { + if (state == HAServiceState.STANDBY || state == HAServiceState.OBSERVER) { ibrManager.clearIBRs(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 42259c885b9..63eed2da1bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1729,7 +1729,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, return haEnabled; } - return HAServiceState.STANDBY == haContext.getState().getServiceState(); + return HAServiceState.STANDBY == haContext.getState().getServiceState() || + HAServiceState.OBSERVER == haContext.getState().getServiceState(); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index f98ae805385..7d354b2597b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -1763,27 +1763,37 @@ public class NameNode extends ReconfigurableBase implements throw new ServiceFailedException("HA for namenode is not enabled"); } if (state == OBSERVER_STATE) { - // TODO: we may need to remove this when enabling failover for observer throw new ServiceFailedException( - "Cannot transition from Observer to Active"); + "Cannot transition from '" + OBSERVER_STATE + "' to '" + + ACTIVE_STATE + "'"); } state.setState(haContext, ACTIVE_STATE); } - - synchronized void transitionToStandby() + + synchronized void transitionToStandby() throws ServiceFailedException, AccessControlException { namesystem.checkSuperuserPrivilege(); if (!haEnabled) { throw new ServiceFailedException("HA for namenode is not enabled"); } - if (state == OBSERVER_STATE) { - // TODO: we may need to remove this when enabling failover for observer - throw new ServiceFailedException( - "Cannot transition from Observer to Standby"); - } state.setState(haContext, STANDBY_STATE); } + synchronized void transitionToObserver() + throws ServiceFailedException, AccessControlException { + namesystem.checkSuperuserPrivilege(); + if (!haEnabled) { + throw new ServiceFailedException("HA for namenode is not enabled"); + } + // Transition from ACTIVE to OBSERVER is forbidden. + if (state == ACTIVE_STATE) { + throw new ServiceFailedException( + "Cannot transition from '" + ACTIVE_STATE + "' to '" + + OBSERVER_STATE + "'"); + } + state.setState(haContext, OBSERVER_STATE); + } + synchronized HAServiceStatus getServiceStatus() throws ServiceFailedException, AccessControlException { if (!haEnabled) { @@ -1835,7 +1845,6 @@ public class NameNode extends ReconfigurableBase implements @Override // NameNodeStatusMXBean public String getState() { - // TODO: maybe we should return a different result for observer namenode? String servStateStr = ""; HAServiceState servState = getServiceState(); if (null != servState) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index a39bbe2ed97..e8cec396382 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -1755,6 +1755,14 @@ public class NameNodeRpcServer implements NamenodeProtocols { nn.transitionToStandby(); } + @Override // HAServiceProtocol + public synchronized void transitionToObserver(StateChangeRequestInfo req) + throws ServiceFailedException, AccessControlException, IOException { + checkNNStartup(); + nn.checkHaStateChange(req); + nn.transitionToObserver(); + } + @Override // HAServiceProtocol public synchronized HAServiceStatus getServiceStatus() throws AccessControlException, ServiceFailedException, IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java index 9a218881589..ac3e7f703c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java @@ -39,6 +39,7 @@ import org.apache.hadoop.ipc.StandbyException; */ @InterfaceAudience.Private public class StandbyState extends HAState { + // TODO: consider implementing a ObserverState instead of using the flag. private final boolean isObserver; public StandbyState() { @@ -46,21 +47,18 @@ public class StandbyState extends HAState { } public StandbyState(boolean isObserver) { - super(HAServiceState.STANDBY); + super(isObserver ? HAServiceState.OBSERVER : HAServiceState.STANDBY); this.isObserver = isObserver; } @Override public void setState(HAContext context, HAState s) throws ServiceFailedException { - if (s == NameNode.ACTIVE_STATE) { + if (s == NameNode.ACTIVE_STATE || + (!isObserver && s == NameNode.OBSERVER_STATE) || + (isObserver && s == NameNode.STANDBY_STATE)) { setStateInternal(context, s); return; } - if (isObserver && s == NameNode.STANDBY_STATE) { - // To guard against the exception in the following super call. - // The other case, standby -> observer, should not happen. - return; - } super.setState(context, s); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java index a598c3ddbf7..c4527e59b28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java @@ -186,4 +186,9 @@ public class NNHAServiceTarget extends HAServiceTarget { public boolean isAutoFailoverEnabled() { return autoFailoverEnabled; } + + @Override + public boolean supportObserver() { + return true; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto index e50883a11f2..85cfb6c0d2e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto @@ -212,6 +212,7 @@ message NNHAStatusHeartbeatProto { enum State { ACTIVE = 0; STANDBY = 1; + OBSERVER = 2; } required State state = 1; required uint64 txid = 2; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java index 528ac4b5dfa..dda6f6c2489 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java @@ -275,6 +275,12 @@ public class TestDFSHAAdmin { Mockito.verify(mockProtocol).transitionToStandby(anyReqInfo()); } + @Test + public void testTransitionToObserver() throws Exception { + assertEquals(0, runTool("-transitionToObserver", "nn1")); + Mockito.verify(mockProtocol).transitionToObserver(anyReqInfo()); + } + @Test public void testFailoverWithNoFencerConfigured() throws Exception { Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java index dff98663c7b..fc569d0aa7d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java @@ -116,6 +116,50 @@ public class TestDFSHAAdminMiniCluster { assertFalse(nnode2.isStandbyState()); assertEquals(0, runTool("-transitionToStandby", "nn2")); assertTrue(nnode2.isStandbyState()); + assertEquals(0, runTool("-transitionToObserver", "nn2")); + assertFalse(nnode2.isStandbyState()); + assertTrue(nnode2.isObserverState()); + } + + @Test + public void testObserverTransition() throws Exception { + NameNode nnode1 = cluster.getNameNode(0); + assertTrue(nnode1.isStandbyState()); + + // Should be able to transition from STANDBY to OBSERVER + assertEquals(0, runTool("-transitionToObserver", "nn1")); + assertFalse(nnode1.isStandbyState()); + assertTrue(nnode1.isObserverState()); + + // Transition from Observer to Observer should be no-op + assertEquals(0, runTool("-transitionToObserver", "nn1")); + assertTrue(nnode1.isObserverState()); + + // Should also be able to transition back from OBSERVER to STANDBY + assertEquals(0, runTool("-transitionToStandby", "nn1")); + assertTrue(nnode1.isStandbyState()); + assertFalse(nnode1.isObserverState()); + } + + @Test + public void testObserverIllegalTransition() throws Exception { + NameNode nnode1 = cluster.getNameNode(0); + assertTrue(nnode1.isStandbyState()); + assertEquals(0, runTool("-transitionToActive", "nn1")); + assertFalse(nnode1.isStandbyState()); + assertTrue(nnode1.isActiveState()); + + // Should NOT be able to transition from ACTIVE to OBSERVER + assertEquals(-1, runTool("-transitionToObserver", "nn1")); + assertTrue(nnode1.isActiveState()); + + // Should NOT be able to transition from OBSERVER to ACTIVE + assertEquals(0, runTool("-transitionToStandby", "nn1")); + assertTrue(nnode1.isStandbyState()); + assertEquals(0, runTool("-transitionToObserver", "nn1")); + assertTrue(nnode1.isObserverState()); + assertEquals(-1, runTool("-transitionToActive", "nn1")); + assertFalse(nnode1.isActiveState()); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 880741a8755..e97a13b1752 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -362,6 +362,13 @@ public class AdminService extends CompositeService implements } } + @Override + public synchronized void transitionToObserver( + StateChangeRequestInfo reqInfo) throws IOException { + // Should NOT get here, as RMHAServiceTarget doesn't support observer. + throw new ServiceFailedException("Does not support transition to Observer"); + } + /** * Return the HA status of this RM. This includes the current state and * whether the RM is ready to become active.