HADOOP-11000. HAServiceProtocol's health state is incorrectly transitioned to SERVICE_NOT_RESPONDING (Contributed by Ming Ma)

(cherry picked from commit cf4b7f506d)
This commit is contained in:
Vinayakumar B 2015-02-17 14:55:56 +05:30
parent 91a5d92916
commit 005e1df540
4 changed files with 94 additions and 21 deletions

View File

@ -537,6 +537,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11467. KerberosAuthenticator can connect to a non-secure cluster. HADOOP-11467. KerberosAuthenticator can connect to a non-secure cluster.
(yzhangal via rkanter) (yzhangal via rkanter)
HADOOP-11000. HAServiceProtocol's health state is incorrectly transitioned
to SERVICE_NOT_RESPONDING (Ming Ma via vinayakumarb)
Release 2.6.1 - UNRELEASED Release 2.6.1 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -30,6 +30,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.*;
import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Daemon;
@ -201,18 +202,20 @@ public class HealthMonitor {
status = proxy.getServiceStatus(); status = proxy.getServiceStatus();
proxy.monitorHealth(); proxy.monitorHealth();
healthy = true; healthy = true;
} catch (HealthCheckFailedException e) {
LOG.warn("Service health check failed for " + targetToMonitor
+ ": " + e.getMessage());
enterState(State.SERVICE_UNHEALTHY);
} catch (Throwable t) { } catch (Throwable t) {
LOG.warn("Transport-level exception trying to monitor health of " + if (isHealthCheckFailedException(t)) {
targetToMonitor + ": " + t.getLocalizedMessage()); LOG.warn("Service health check failed for " + targetToMonitor
RPC.stopProxy(proxy); + ": " + t.getMessage());
proxy = null; enterState(State.SERVICE_UNHEALTHY);
enterState(State.SERVICE_NOT_RESPONDING); } else {
Thread.sleep(sleepAfterDisconnectMillis); LOG.warn("Transport-level exception trying to monitor health of " +
return; targetToMonitor + ": " + t.getCause() + " " + t.getLocalizedMessage());
RPC.stopProxy(proxy);
proxy = null;
enterState(State.SERVICE_NOT_RESPONDING);
Thread.sleep(sleepAfterDisconnectMillis);
return;
}
} }
if (status != null) { if (status != null) {
@ -225,7 +228,15 @@ public class HealthMonitor {
Thread.sleep(checkIntervalMillis); Thread.sleep(checkIntervalMillis);
} }
} }
private boolean isHealthCheckFailedException(Throwable t) {
return ((t instanceof HealthCheckFailedException) ||
(t instanceof RemoteException &&
((RemoteException)t).unwrapRemoteException(
HealthCheckFailedException.class) instanceof
HealthCheckFailedException));
}
private synchronized void setLastServiceStatus(HAServiceStatus status) { private synchronized void setLastServiceStatus(HAServiceStatus status) {
this.lastServiceState = status; this.lastServiceState = status;
for (ServiceStateCallback cb : serviceStateCallbacks) { for (ServiceStateCallback cb : serviceStateCallbacks) {

View File

@ -22,15 +22,25 @@ import java.io.IOException;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.util.ArrayList; import java.util.ArrayList;
import com.google.protobuf.BlockingService;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.protocolPB.HAServiceProtocolPB;
import org.apache.hadoop.ha.protocolPB.HAServiceProtocolServerSideTranslatorPB;
import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceProtocolService;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.AccessControlException;
import org.mockito.Mockito; import org.mockito.Mockito;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_DEFAULT;
/** /**
* Test-only implementation of {@link HAServiceTarget}, which returns * Test-only implementation of {@link HAServiceTarget}, which returns
* a mock implementation. * a mock implementation.
@ -50,22 +60,33 @@ class DummyHAService extends HAServiceTarget {
DummySharedResource sharedResource; DummySharedResource sharedResource;
public int fenceCount = 0; public int fenceCount = 0;
public int activeTransitionCount = 0; public int activeTransitionCount = 0;
boolean testWithProtoBufRPC = false;
static ArrayList<DummyHAService> instances = Lists.newArrayList(); static ArrayList<DummyHAService> instances = Lists.newArrayList();
int index; int index;
DummyHAService(HAServiceState state, InetSocketAddress address) { DummyHAService(HAServiceState state, InetSocketAddress address) {
this(state, address, false);
}
DummyHAService(HAServiceState state, InetSocketAddress address,
boolean testWithProtoBufRPC) {
this.state = state; this.state = state;
this.proxy = makeMock(); this.testWithProtoBufRPC = testWithProtoBufRPC;
if (testWithProtoBufRPC) {
this.address = startAndGetRPCServerAddress(address);
} else {
this.address = address;
}
Configuration conf = new Configuration();
this.proxy = makeMock(conf, HA_HM_RPC_TIMEOUT_DEFAULT);
try { try {
Configuration conf = new Configuration(); conf.set(DUMMY_FENCE_KEY, DummyFencer.class.getName());
conf.set(DUMMY_FENCE_KEY, DummyFencer.class.getName());
this.fencer = Mockito.spy( this.fencer = Mockito.spy(
NodeFencer.create(conf, DUMMY_FENCE_KEY)); NodeFencer.create(conf, DUMMY_FENCE_KEY));
} catch (BadFencingConfigurationException e) { } catch (BadFencingConfigurationException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
this.address = address;
synchronized (instances) { synchronized (instances) {
instances.add(this); instances.add(this);
this.index = instances.size(); this.index = instances.size();
@ -75,9 +96,42 @@ class DummyHAService extends HAServiceTarget {
public void setSharedResource(DummySharedResource rsrc) { public void setSharedResource(DummySharedResource rsrc) {
this.sharedResource = rsrc; this.sharedResource = rsrc;
} }
private HAServiceProtocol makeMock() { private InetSocketAddress startAndGetRPCServerAddress(InetSocketAddress serverAddress) {
return Mockito.spy(new MockHAProtocolImpl()); Configuration conf = new Configuration();
try {
RPC.setProtocolEngine(conf,
HAServiceProtocolPB.class, ProtobufRpcEngine.class);
HAServiceProtocolServerSideTranslatorPB haServiceProtocolXlator =
new HAServiceProtocolServerSideTranslatorPB(new MockHAProtocolImpl());
BlockingService haPbService = HAServiceProtocolService
.newReflectiveBlockingService(haServiceProtocolXlator);
Server server = new RPC.Builder(conf)
.setProtocol(HAServiceProtocolPB.class)
.setInstance(haPbService)
.setBindAddress(serverAddress.getHostName())
.setPort(serverAddress.getPort()).build();
server.start();
return NetUtils.getConnectAddress(server);
} catch (IOException e) {
return null;
}
}
private HAServiceProtocol makeMock(Configuration conf, int timeoutMs) {
HAServiceProtocol service;
if (!testWithProtoBufRPC) {
service = new MockHAProtocolImpl();
} else {
try {
service = super.getProxy(conf, timeoutMs);
} catch (IOException e) {
return null;
}
}
return Mockito.spy(service);
} }
@Override @Override
@ -93,6 +147,9 @@ class DummyHAService extends HAServiceTarget {
@Override @Override
public HAServiceProtocol getProxy(Configuration conf, int timeout) public HAServiceProtocol getProxy(Configuration conf, int timeout)
throws IOException { throws IOException {
if (testWithProtoBufRPC) {
proxy = makeMock(conf, timeout);
}
return proxy; return proxy;
} }
@ -168,7 +225,7 @@ class DummyHAService extends HAServiceTarget {
public HAServiceStatus getServiceStatus() throws IOException { public HAServiceStatus getServiceStatus() throws IOException {
checkUnreachable(); checkUnreachable();
HAServiceStatus ret = new HAServiceStatus(state); HAServiceStatus ret = new HAServiceStatus(state);
if (state == HAServiceState.STANDBY) { if (state == HAServiceState.STANDBY || state == HAServiceState.ACTIVE) {
ret.setReadyToBecomeActive(); ret.setReadyToBecomeActive();
} }
return ret; return ret;

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.ha;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import java.io.IOException; import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -54,7 +55,8 @@ public class TestHealthMonitor {
conf.setInt(CommonConfigurationKeys.HA_HM_CONNECT_RETRY_INTERVAL_KEY, 50); conf.setInt(CommonConfigurationKeys.HA_HM_CONNECT_RETRY_INTERVAL_KEY, 50);
conf.setInt(CommonConfigurationKeys.HA_HM_SLEEP_AFTER_DISCONNECT_KEY, 50); conf.setInt(CommonConfigurationKeys.HA_HM_SLEEP_AFTER_DISCONNECT_KEY, 50);
svc = new DummyHAService(HAServiceState.ACTIVE, null); svc = new DummyHAService(HAServiceState.ACTIVE,
new InetSocketAddress("0.0.0.0", 0), true);
hm = new HealthMonitor(conf, svc) { hm = new HealthMonitor(conf, svc) {
@Override @Override
protected HAServiceProtocol createProxy() throws IOException { protected HAServiceProtocol createProxy() throws IOException {