HDFS-3071. haadmin failover command does not provide enough detail when target NN is not ready to be active. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1304204 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-03-23 06:29:48 +00:00
parent 2df39f259b
commit 13e2ed8c65
16 changed files with 240 additions and 165 deletions

View File

@ -60,20 +60,32 @@ public class FailoverController {
InetSocketAddress toSvcAddr, InetSocketAddress toSvcAddr,
boolean forceActive) boolean forceActive)
throws FailoverFailedException { throws FailoverFailedException {
HAServiceState toSvcState; HAServiceStatus toSvcStatus;
try { try {
toSvcState = toSvc.getServiceState(); toSvcStatus = toSvc.getServiceStatus();
} catch (IOException e) { } catch (IOException e) {
String msg = "Unable to get service state for " + toSvcAddr; String msg = "Unable to get service state for " + toSvcAddr;
LOG.error(msg, e); LOG.error(msg, e);
throw new FailoverFailedException(msg, e); throw new FailoverFailedException(msg, e);
} }
if (!toSvcState.equals(HAServiceState.STANDBY)) { if (!toSvcStatus.getState().equals(HAServiceState.STANDBY)) {
throw new FailoverFailedException( throw new FailoverFailedException(
"Can't failover to an active service"); "Can't failover to an active service");
} }
if (!toSvcStatus.isReadyToBecomeActive()) {
String notReadyReason = toSvcStatus.getNotReadyReason();
if (!forceActive) {
throw new FailoverFailedException(
toSvcAddr + " is not ready to become active: " +
notReadyReason);
} else {
LOG.warn("Service is not ready to become active, but forcing: " +
notReadyReason);
}
}
try { try {
HAServiceProtocolHelper.monitorHealth(toSvc); HAServiceProtocolHelper.monitorHealth(toSvc);
@ -84,18 +96,6 @@ public class FailoverController {
throw new FailoverFailedException( throw new FailoverFailedException(
"Got an IO exception", e); "Got an IO exception", e);
} }
try {
if (!toSvc.readyToBecomeActive()) {
if (!forceActive) {
throw new FailoverFailedException(
toSvcAddr + " is not ready to become active");
}
}
} catch (IOException e) {
throw new FailoverFailedException(
"Got an IO exception", e);
}
} }
/** /**

View File

@ -32,7 +32,6 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configured; import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB; import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.ToolRunner;
@ -221,7 +220,7 @@ public abstract class HAAdmin extends Configured implements Tool {
} }
HAServiceProtocol proto = getProtocol(argv[1]); HAServiceProtocol proto = getProtocol(argv[1]);
out.println(proto.getServiceState()); out.println(proto.getServiceStatus().getState());
return 0; return 0;
} }

View File

@ -115,27 +115,15 @@ public interface HAServiceProtocol {
IOException; IOException;
/** /**
* Return the current state of the service. * Return the current status of the service. The status indicates
* the current <em>state</em> (e.g ACTIVE/STANDBY) as well as
* some additional information. {@see HAServiceStatus}
* *
* @throws AccessControlException * @throws AccessControlException
* if access is denied. * if access is denied.
* @throws IOException * @throws IOException
* if other errors happen * if other errors happen
*/ */
public HAServiceState getServiceState() throws AccessControlException, public HAServiceStatus getServiceStatus() throws AccessControlException,
IOException; IOException;
/**
* Return true if the service is capable and ready to transition
* from the standby state to the active state.
*
* @return true if the service is ready to become active, false otherwise.
* @throws AccessControlException
* if access is denied.
* @throws IOException
* if other errors happen
*/
public boolean readyToBecomeActive() throws ServiceFailedException,
AccessControlException,
IOException;
} }

View File

@ -0,0 +1,56 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ha;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
@InterfaceAudience.Private
public class HAServiceStatus {
private HAServiceState state;
private boolean readyToBecomeActive;
private String notReadyReason;
public HAServiceStatus(HAServiceState state) {
this.state = state;
}
public HAServiceState getState() {
return state;
}
public HAServiceStatus setReadyToBecomeActive() {
this.readyToBecomeActive = true;
this.notReadyReason = null;
return this;
}
public HAServiceStatus setNotReadyToBecomeActive(String reason) {
this.readyToBecomeActive = false;
this.notReadyReason = reason;
return this;
}
public boolean isReadyToBecomeActive() {
return readyToBecomeActive;
}
public String getNotReadyReason() {
return notReadyReason;
}
}

View File

@ -77,7 +77,8 @@ class HealthMonitor {
private List<Callback> callbacks = Collections.synchronizedList( private List<Callback> callbacks = Collections.synchronizedList(
new LinkedList<Callback>()); new LinkedList<Callback>());
private HAServiceState lastServiceState = HAServiceState.INITIALIZING; private HAServiceStatus lastServiceState = new HAServiceStatus(
HAServiceState.INITIALIZING);
enum State { enum State {
/** /**
@ -188,10 +189,10 @@ class HealthMonitor {
private void doHealthChecks() throws InterruptedException { private void doHealthChecks() throws InterruptedException {
while (shouldRun) { while (shouldRun) {
HAServiceState state = null; HAServiceStatus status = null;
boolean healthy = false; boolean healthy = false;
try { try {
state = proxy.getServiceState(); status = proxy.getServiceStatus();
proxy.monitorHealth(); proxy.monitorHealth();
healthy = true; healthy = true;
} catch (HealthCheckFailedException e) { } catch (HealthCheckFailedException e) {
@ -207,8 +208,8 @@ class HealthMonitor {
return; return;
} }
if (state != null) { if (status != null) {
setLastServiceState(state); setLastServiceStatus(status);
} }
if (healthy) { if (healthy) {
enterState(State.SERVICE_HEALTHY); enterState(State.SERVICE_HEALTHY);
@ -218,8 +219,8 @@ class HealthMonitor {
} }
} }
private synchronized void setLastServiceState(HAServiceState serviceState) { private synchronized void setLastServiceStatus(HAServiceStatus status) {
this.lastServiceState = serviceState; this.lastServiceState = status;
} }
private synchronized void enterState(State newState) { private synchronized void enterState(State newState) {
@ -238,7 +239,7 @@ class HealthMonitor {
return state; return state;
} }
synchronized HAServiceState getLastServiceState() { synchronized HAServiceStatus getLastServiceStatus() {
return lastServiceState; return lastServiceState;
} }

View File

@ -27,10 +27,11 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateRequestProto; import org.apache.hadoop.ha.HAServiceStatus;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusResponseProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto;
import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtobufHelper;
@ -60,10 +61,8 @@ public class HAServiceProtocolClientSideTranslatorPB implements
TransitionToActiveRequestProto.newBuilder().build(); TransitionToActiveRequestProto.newBuilder().build();
private final static TransitionToStandbyRequestProto TRANSITION_TO_STANDBY_REQ = private final static TransitionToStandbyRequestProto TRANSITION_TO_STANDBY_REQ =
TransitionToStandbyRequestProto.newBuilder().build(); TransitionToStandbyRequestProto.newBuilder().build();
private final static GetServiceStateRequestProto GET_SERVICE_STATE_REQ = private final static GetServiceStatusRequestProto GET_SERVICE_STATUS_REQ =
GetServiceStateRequestProto.newBuilder().build(); GetServiceStatusRequestProto.newBuilder().build();
private final static ReadyToBecomeActiveRequestProto ACTIVE_READY_REQ =
ReadyToBecomeActiveRequestProto.newBuilder().build();
private final HAServiceProtocolPB rpcProxy; private final HAServiceProtocolPB rpcProxy;
@ -113,14 +112,26 @@ public class HAServiceProtocolClientSideTranslatorPB implements
} }
@Override @Override
public HAServiceState getServiceState() throws IOException { public HAServiceStatus getServiceStatus() throws IOException {
HAServiceStateProto state; GetServiceStatusResponseProto status;
try { try {
state = rpcProxy.getServiceState(NULL_CONTROLLER, status = rpcProxy.getServiceStatus(NULL_CONTROLLER,
GET_SERVICE_STATE_REQ).getState(); GET_SERVICE_STATUS_REQ);
} catch (ServiceException e) { } catch (ServiceException e) {
throw ProtobufHelper.getRemoteException(e); throw ProtobufHelper.getRemoteException(e);
} }
HAServiceStatus ret = new HAServiceStatus(
convert(status.getState()));
if (status.getReadyToBecomeActive()) {
ret.setReadyToBecomeActive();
} else {
ret.setNotReadyToBecomeActive(status.getNotReadyReason());
}
return ret;
}
private HAServiceState convert(HAServiceStateProto state) {
switch(state) { switch(state) {
case ACTIVE: case ACTIVE:
return HAServiceState.ACTIVE; return HAServiceState.ACTIVE;
@ -137,16 +148,6 @@ public class HAServiceProtocolClientSideTranslatorPB implements
RPC.stopProxy(rpcProxy); RPC.stopProxy(rpcProxy);
} }
@Override
public boolean readyToBecomeActive() throws IOException {
try {
return rpcProxy.readyToBecomeActive(NULL_CONTROLLER, ACTIVE_READY_REQ)
.getReadyToBecomeActive();
} catch (ServiceException e) {
throw ProtobufHelper.getRemoteException(e);
}
}
@Override @Override
public Object getUnderlyingProxyObject() { public Object getUnderlyingProxyObject() {
return rpcProxy; return rpcProxy;

View File

@ -22,14 +22,12 @@ import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceStatus;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateResponseProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusResponseProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthResponseProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthResponseProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveResponseProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveResponseProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveResponseProto;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto;
@ -99,29 +97,37 @@ public class HAServiceProtocolServerSideTranslatorPB implements
} }
@Override @Override
public GetServiceStateResponseProto getServiceState(RpcController controller, public GetServiceStatusResponseProto getServiceStatus(RpcController controller,
GetServiceStateRequestProto request) throws ServiceException { GetServiceStatusRequestProto request) throws ServiceException {
HAServiceState s; HAServiceStatus s;
try { try {
s = server.getServiceState(); s = server.getServiceStatus();
} catch(IOException e) { } catch(IOException e) {
throw new ServiceException(e); throw new ServiceException(e);
} }
HAServiceStateProto ret; HAServiceStateProto retState;
switch (s) { switch (s.getState()) {
case ACTIVE: case ACTIVE:
ret = HAServiceStateProto.ACTIVE; retState = HAServiceStateProto.ACTIVE;
break; break;
case STANDBY: case STANDBY:
ret = HAServiceStateProto.STANDBY; retState = HAServiceStateProto.STANDBY;
break; break;
case INITIALIZING: case INITIALIZING:
default: default:
ret = HAServiceStateProto.INITIALIZING; retState = HAServiceStateProto.INITIALIZING;
break; break;
} }
return GetServiceStateResponseProto.newBuilder().setState(ret).build();
GetServiceStatusResponseProto.Builder ret =
GetServiceStatusResponseProto.newBuilder()
.setState(retState)
.setReadyToBecomeActive(s.isReadyToBecomeActive());
if (!s.isReadyToBecomeActive()) {
ret.setNotReadyReason(s.getNotReadyReason());
}
return ret.build();
} }
@Override @Override
@ -143,16 +149,4 @@ public class HAServiceProtocolServerSideTranslatorPB implements
RPC.getProtocolVersion(HAServiceProtocolPB.class), RPC.getProtocolVersion(HAServiceProtocolPB.class),
HAServiceProtocolPB.class); HAServiceProtocolPB.class);
} }
@Override
public ReadyToBecomeActiveResponseProto readyToBecomeActive(
RpcController controller, ReadyToBecomeActiveRequestProto request)
throws ServiceException {
try {
return ReadyToBecomeActiveResponseProto.newBuilder()
.setReadyToBecomeActive(server.readyToBecomeActive()).build();
} catch (IOException e) {
throw new ServiceException(e);
}
}
} }

View File

@ -66,27 +66,20 @@ message TransitionToStandbyResponseProto {
/** /**
* void request * void request
*/ */
message GetServiceStateRequestProto { message GetServiceStatusRequestProto {
} }
/** /**
* Returns the state of the service * Returns the state of the service
*/ */
message GetServiceStateResponseProto { message GetServiceStatusResponseProto {
required HAServiceStateProto state = 1; required HAServiceStateProto state = 1;
}
/** // If state is STANDBY, indicate whether it is
* void request // ready to become active.
*/ optional bool readyToBecomeActive = 2;
message ReadyToBecomeActiveRequestProto { // If not ready to become active, a textual explanation of why not
} optional string notReadyReason = 3;
/**
* Returns true if service is ready to become active
*/
message ReadyToBecomeActiveResponseProto {
required bool readyToBecomeActive = 1;
} }
/** /**
@ -115,14 +108,8 @@ service HAServiceProtocolService {
returns(TransitionToStandbyResponseProto); returns(TransitionToStandbyResponseProto);
/** /**
* Get the current state of the service. * Get the current status of the service.
*/ */
rpc getServiceState(GetServiceStateRequestProto) rpc getServiceStatus(GetServiceStatusRequestProto)
returns(GetServiceStateResponseProto); returns(GetServiceStatusResponseProto);
/**
* Check if the service is ready to become active
*/
rpc readyToBecomeActive(ReadyToBecomeActiveRequestProto)
returns(ReadyToBecomeActiveResponseProto);
} }

View File

@ -30,8 +30,6 @@ import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
import org.apache.hadoop.ha.TestNodeFencer.AlwaysSucceedFencer; import org.apache.hadoop.ha.TestNodeFencer.AlwaysSucceedFencer;
import org.apache.hadoop.ha.TestNodeFencer.AlwaysFailFencer; import org.apache.hadoop.ha.TestNodeFencer.AlwaysFailFencer;
import static org.apache.hadoop.ha.TestNodeFencer.setupFencer; import static org.apache.hadoop.ha.TestNodeFencer.setupFencer;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.AccessControlException;
@ -66,13 +64,16 @@ public class TestFailoverController {
} }
@Override @Override
public HAServiceState getServiceState() throws IOException { public HAServiceStatus getServiceStatus() throws IOException {
return state; HAServiceStatus ret = new HAServiceStatus(state);
if (state == HAServiceState.STANDBY) {
ret.setReadyToBecomeActive();
}
return ret;
} }
@Override private HAServiceState getServiceState() {
public boolean readyToBecomeActive() throws ServiceFailedException, IOException { return state;
return true;
} }
} }
@ -127,13 +128,13 @@ public class TestFailoverController {
public void testFailoverWithoutPermission() throws Exception { public void testFailoverWithoutPermission() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE) { DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
@Override @Override
public HAServiceState getServiceState() throws IOException { public HAServiceStatus getServiceStatus() throws IOException {
throw new AccessControlException("Access denied"); throw new AccessControlException("Access denied");
} }
}; };
DummyService svc2 = new DummyService(HAServiceState.STANDBY) { DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
@Override @Override
public HAServiceState getServiceState() throws IOException { public HAServiceStatus getServiceStatus() throws IOException {
throw new AccessControlException("Access denied"); throw new AccessControlException("Access denied");
} }
}; };
@ -153,8 +154,10 @@ public class TestFailoverController {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE); DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.STANDBY) { DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
@Override @Override
public boolean readyToBecomeActive() throws ServiceFailedException, IOException { public HAServiceStatus getServiceStatus() throws IOException {
return false; HAServiceStatus ret = new HAServiceStatus(HAServiceState.STANDBY);
ret.setNotReadyToBecomeActive("injected not ready");
return ret;
} }
}; };
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName()); NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
@ -164,6 +167,9 @@ public class TestFailoverController {
fail("Can't failover to a service that's not ready"); fail("Can't failover to a service that's not ready");
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
// Expected // Expected
if (!ffe.getMessage().contains("injected not ready")) {
throw ffe;
}
} }
assertEquals(HAServiceState.ACTIVE, svc1.getServiceState()); assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());

View File

@ -30,7 +30,6 @@ import org.apache.hadoop.conf.Configuration;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
import static org.mockito.Mockito.when;
import com.google.common.base.Charsets; import com.google.common.base.Charsets;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
@ -46,7 +45,6 @@ public class TestHAAdmin {
@Before @Before
public void setup() throws IOException { public void setup() throws IOException {
mockProtocol = Mockito.mock(HAServiceProtocol.class); mockProtocol = Mockito.mock(HAServiceProtocol.class);
when(mockProtocol.readyToBecomeActive()).thenReturn(true);
tool = new HAAdmin() { tool = new HAAdmin() {
@Override @Override
protected HAServiceProtocol getProtocol(String target) throws IOException { protected HAServiceProtocol getProtocol(String target) throws IOException {

View File

@ -64,8 +64,8 @@ public class TestHealthMonitor {
conf.setInt(CommonConfigurationKeys.HA_HM_CONNECT_RETRY_INTERVAL_KEY, 50); conf.setInt(CommonConfigurationKeys.HA_HM_CONNECT_RETRY_INTERVAL_KEY, 50);
conf.setInt(CommonConfigurationKeys.HA_HM_SLEEP_AFTER_DISCONNECT_KEY, 50); conf.setInt(CommonConfigurationKeys.HA_HM_SLEEP_AFTER_DISCONNECT_KEY, 50);
mockProxy = Mockito.mock(HAServiceProtocol.class); mockProxy = Mockito.mock(HAServiceProtocol.class);
Mockito.doReturn(HAServiceState.ACTIVE) Mockito.doReturn(new HAServiceStatus(HAServiceState.ACTIVE))
.when(mockProxy).getServiceState(); .when(mockProxy).getServiceStatus();
hm = new HealthMonitor(conf, BOGUS_ADDR) { hm = new HealthMonitor(conf, BOGUS_ADDR) {
@Override @Override

View File

@ -159,6 +159,9 @@ Release 0.23.3 - UNRELEASED
HDFS-3044. fsck move should be non-destructive by default. HDFS-3044. fsck move should be non-destructive by default.
(Colin Patrick McCabe via eli) (Colin Patrick McCabe via eli)
HDFS-3071. haadmin failover command does not provide enough detail when
target NN is not ready to be active. (todd)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-2477. Optimize computing the diff between a block report and the HDFS-2477. Optimize computing the diff between a block report and the

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.HAServiceStatus;
import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeys;
@ -990,24 +991,41 @@ public class NameNode {
state.setState(haContext, STANDBY_STATE); state.setState(haContext, STANDBY_STATE);
} }
synchronized HAServiceState getServiceState() throws AccessControlException { synchronized HAServiceStatus getServiceStatus()
throws ServiceFailedException, AccessControlException {
namesystem.checkSuperuserPrivilege(); namesystem.checkSuperuserPrivilege();
if (!haEnabled) {
throw new ServiceFailedException("HA for namenode is not enabled");
}
if (state == null) {
return new HAServiceStatus(HAServiceState.INITIALIZING);
}
HAServiceState retState = state.getServiceState();
HAServiceStatus ret = new HAServiceStatus(retState);
if (retState == HAServiceState.STANDBY) {
String safemodeTip = namesystem.getSafeModeTip();
if (!safemodeTip.isEmpty()) {
ret.setNotReadyToBecomeActive(
"The NameNode is in safemode. " +
safemodeTip);
} else {
ret.setReadyToBecomeActive();
}
} else if (retState == HAServiceState.ACTIVE) {
ret.setReadyToBecomeActive();
} else {
ret.setNotReadyToBecomeActive("State is " + state);
}
return ret;
}
synchronized HAServiceState getServiceState() {
if (state == null) { if (state == null) {
return HAServiceState.INITIALIZING; return HAServiceState.INITIALIZING;
} }
return state.getServiceState(); return state.getServiceState();
} }
synchronized boolean readyToBecomeActive()
throws ServiceFailedException, AccessControlException {
namesystem.checkSuperuserPrivilege();
if (!haEnabled) {
throw new ServiceFailedException("HA for namenode is not enabled");
}
return !isInSafeMode();
}
/** /**
* Class used as expose {@link NameNode} as context to {@link HAState} * Class used as expose {@link NameNode} as context to {@link HAState}
* *

View File

@ -41,6 +41,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.ha.HAServiceStatus;
import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceProtocolService; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceProtocolService;
@ -981,15 +982,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
} }
@Override // HAServiceProtocol @Override // HAServiceProtocol
public synchronized HAServiceState getServiceState() public synchronized HAServiceStatus getServiceStatus()
throws AccessControlException { throws AccessControlException, ServiceFailedException {
return nn.getServiceState(); return nn.getServiceStatus();
}
@Override // HAServiceProtocol
public synchronized boolean readyToBecomeActive()
throws ServiceFailedException, AccessControlException {
return nn.readyToBecomeActive();
} }
/** /**

View File

@ -31,13 +31,13 @@ import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.HAServiceStatus;
import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.ha.NodeFencer; import org.apache.hadoop.ha.NodeFencer;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
import static org.mockito.Mockito.when;
import com.google.common.base.Charsets; import com.google.common.base.Charsets;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
@ -51,6 +51,11 @@ public class TestDFSHAAdmin {
private HAServiceProtocol mockProtocol; private HAServiceProtocol mockProtocol;
private static final String NSID = "ns1"; private static final String NSID = "ns1";
private static final HAServiceStatus STANDBY_READY_RESULT =
new HAServiceStatus(HAServiceState.STANDBY)
.setReadyToBecomeActive();
private static String HOST_A = "1.2.3.1"; private static String HOST_A = "1.2.3.1";
private static String HOST_B = "1.2.3.2"; private static String HOST_B = "1.2.3.2";
@ -73,7 +78,6 @@ public class TestDFSHAAdmin {
@Before @Before
public void setup() throws IOException { public void setup() throws IOException {
mockProtocol = Mockito.mock(HAServiceProtocol.class); mockProtocol = Mockito.mock(HAServiceProtocol.class);
when(mockProtocol.readyToBecomeActive()).thenReturn(true);
tool = new DFSHAAdmin() { tool = new DFSHAAdmin() {
@Override @Override
protected HAServiceProtocol getProtocol(String serviceId) throws IOException { protected HAServiceProtocol getProtocol(String serviceId) throws IOException {
@ -105,8 +109,9 @@ public class TestDFSHAAdmin {
@Test @Test
public void testNamenodeResolution() throws Exception { public void testNamenodeResolution() throws Exception {
Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
assertEquals(0, runTool("-getServiceState", "nn1")); assertEquals(0, runTool("-getServiceState", "nn1"));
Mockito.verify(mockProtocol).getServiceState(); Mockito.verify(mockProtocol).getServiceStatus();
assertEquals(-1, runTool("-getServiceState", "undefined")); assertEquals(-1, runTool("-getServiceState", "undefined"));
assertOutputContains( assertOutputContains(
"Unable to determine service address for namenode 'undefined'"); "Unable to determine service address for namenode 'undefined'");
@ -133,13 +138,13 @@ public class TestDFSHAAdmin {
@Test @Test
public void testFailoverWithNoFencerConfigured() throws Exception { public void testFailoverWithNoFencerConfigured() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
assertEquals(-1, runTool("-failover", "nn1", "nn2")); assertEquals(-1, runTool("-failover", "nn1", "nn2"));
} }
@Test @Test
public void testFailoverWithFencerConfigured() throws Exception { public void testFailoverWithFencerConfigured() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
HdfsConfiguration conf = getHAConf(); HdfsConfiguration conf = getHAConf();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)"); conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf); tool.setConf(conf);
@ -148,7 +153,7 @@ public class TestDFSHAAdmin {
@Test @Test
public void testFailoverWithFencerAndNameservice() throws Exception { public void testFailoverWithFencerAndNameservice() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
HdfsConfiguration conf = getHAConf(); HdfsConfiguration conf = getHAConf();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)"); conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf); tool.setConf(conf);
@ -157,7 +162,7 @@ public class TestDFSHAAdmin {
@Test @Test
public void testFailoverWithFencerConfiguredAndForce() throws Exception { public void testFailoverWithFencerConfiguredAndForce() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
HdfsConfiguration conf = getHAConf(); HdfsConfiguration conf = getHAConf();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)"); conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf); tool.setConf(conf);
@ -166,7 +171,7 @@ public class TestDFSHAAdmin {
@Test @Test
public void testFailoverWithForceActive() throws Exception { public void testFailoverWithForceActive() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
HdfsConfiguration conf = getHAConf(); HdfsConfiguration conf = getHAConf();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)"); conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf); tool.setConf(conf);
@ -175,7 +180,7 @@ public class TestDFSHAAdmin {
@Test @Test
public void testFailoverWithInvalidFenceArg() throws Exception { public void testFailoverWithInvalidFenceArg() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
HdfsConfiguration conf = getHAConf(); HdfsConfiguration conf = getHAConf();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)"); conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf); tool.setConf(conf);
@ -184,13 +189,13 @@ public class TestDFSHAAdmin {
@Test @Test
public void testFailoverWithFenceButNoFencer() throws Exception { public void testFailoverWithFenceButNoFencer() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
assertEquals(-1, runTool("-failover", "nn1", "nn2", "--forcefence")); assertEquals(-1, runTool("-failover", "nn1", "nn2", "--forcefence"));
} }
@Test @Test
public void testFailoverWithFenceAndBadFencer() throws Exception { public void testFailoverWithFenceAndBadFencer() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
HdfsConfiguration conf = getHAConf(); HdfsConfiguration conf = getHAConf();
conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!"); conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!");
tool.setConf(conf); tool.setConf(conf);
@ -199,7 +204,7 @@ public class TestDFSHAAdmin {
@Test @Test
public void testForceFenceOptionListedBeforeArgs() throws Exception { public void testForceFenceOptionListedBeforeArgs() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
HdfsConfiguration conf = getHAConf(); HdfsConfiguration conf = getHAConf();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)"); conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf); tool.setConf(conf);
@ -207,9 +212,10 @@ public class TestDFSHAAdmin {
} }
@Test @Test
public void testGetServiceState() throws Exception { public void testGetServiceStatus() throws Exception {
Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
assertEquals(0, runTool("-getServiceState", "nn1")); assertEquals(0, runTool("-getServiceState", "nn1"));
Mockito.verify(mockProtocol).getServiceState(); Mockito.verify(mockProtocol).getServiceStatus();
} }
@Test @Test

View File

@ -21,6 +21,7 @@ import static org.junit.Assert.*;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -28,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.ha.NodeFencer; import org.apache.hadoop.ha.NodeFencer;
import org.junit.After; import org.junit.After;
@ -46,7 +48,10 @@ public class TestDFSHAAdminMiniCluster {
private MiniDFSCluster cluster; private MiniDFSCluster cluster;
private Configuration conf; private Configuration conf;
private DFSHAAdmin tool; private DFSHAAdmin tool;
private ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream();
private String errOutput;
@Before @Before
public void setup() throws IOException { public void setup() throws IOException {
conf = new Configuration(); conf = new Configuration();
@ -55,6 +60,7 @@ public class TestDFSHAAdminMiniCluster {
.build(); .build();
tool = new DFSHAAdmin(); tool = new DFSHAAdmin();
tool.setConf(conf); tool.setConf(conf);
tool.setErrOut(new PrintStream(errOutBytes));
cluster.waitActive(); cluster.waitActive();
} }
@ -67,6 +73,12 @@ public class TestDFSHAAdminMiniCluster {
public void testGetServiceState() throws Exception { public void testGetServiceState() throws Exception {
assertEquals(0, runTool("-getServiceState", "nn1")); assertEquals(0, runTool("-getServiceState", "nn1"));
assertEquals(0, runTool("-getServiceState", "nn2")); assertEquals(0, runTool("-getServiceState", "nn2"));
cluster.transitionToActive(0);
assertEquals(0, runTool("-getServiceState", "nn1"));
NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
assertEquals(0, runTool("-getServiceState", "nn1"));
} }
@Test @Test
@ -85,6 +97,18 @@ public class TestDFSHAAdminMiniCluster {
assertEquals(0, runTool("-transitionToStandby", "nn2")); assertEquals(0, runTool("-transitionToStandby", "nn2"));
assertTrue(nnode2.isStandbyState()); assertTrue(nnode2.isStandbyState());
} }
@Test
public void testTryFailoverToSafeMode() throws Exception {
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf);
NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
assertEquals(-1, runTool("-failover", "nn2", "nn1"));
assertTrue("Bad output: " + errOutput,
errOutput.contains("is not ready to become active: " +
"The NameNode is in safemode"));
}
/** /**
* Test failover with various options * Test failover with various options
@ -132,11 +156,10 @@ public class TestDFSHAAdminMiniCluster {
} }
private int runTool(String ... args) throws Exception { private int runTool(String ... args) throws Exception {
ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream();
errOutBytes.reset(); errOutBytes.reset();
LOG.info("Running: DFSHAAdmin " + Joiner.on(" ").join(args)); LOG.info("Running: DFSHAAdmin " + Joiner.on(" ").join(args));
int ret = tool.run(args); int ret = tool.run(args);
String errOutput = new String(errOutBytes.toByteArray(), Charsets.UTF_8); errOutput = new String(errOutBytes.toByteArray(), Charsets.UTF_8);
LOG.info("Output:\n" + errOutput); LOG.info("Output:\n" + errOutput);
return ret; return ret;
} }