HADOOP-7938. HA: the FailoverController should optionally fence the active during failover. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238058 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-01-30 22:42:46 +00:00
parent 5c156519df
commit 6884348444
9 changed files with 383 additions and 79 deletions

View File

@ -36,3 +36,6 @@ HADOOP-7992. Add ZKClient library to facilitate leader election.
(Bikas Saha via suresh). (Bikas Saha via suresh).
HADOOP-7983. HA: failover should be able to pass args to fencers. (eli) HADOOP-7983. HA: failover should be able to pass args to fencers. (eli)
HADOOP-7938. HA: the FailoverController should optionally fence the
active during failover. (eli)

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.ha; package org.apache.hadoop.ha;
import java.io.IOException; import java.io.IOException;
import java.net.InetSocketAddress;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -26,6 +27,8 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import com.google.common.base.Preconditions;
/** /**
* The FailOverController is responsible for electing an active service * The FailOverController is responsible for electing an active service
* on startup or when the current active is changing (eg due to failure), * on startup or when the current active is changing (eg due to failure),
@ -48,13 +51,13 @@ public class FailoverController {
* @throws FailoverFailedException if we should avoid failover * @throws FailoverFailedException if we should avoid failover
*/ */
private static void preFailoverChecks(HAServiceProtocol toSvc, private static void preFailoverChecks(HAServiceProtocol toSvc,
String toSvcName) InetSocketAddress toSvcAddr)
throws FailoverFailedException { throws FailoverFailedException {
HAServiceState toSvcState; HAServiceState toSvcState;
try { try {
toSvcState = toSvc.getServiceState(); toSvcState = toSvc.getServiceState();
} catch (IOException e) { } catch (IOException e) {
String msg = "Unable to get service state for " + toSvcName; String msg = "Unable to get service state for " + toSvcAddr;
LOG.error(msg, e); LOG.error(msg, e);
throw new FailoverFailedException(msg, e); throw new FailoverFailedException(msg, e);
} }
@ -69,7 +72,7 @@ public class FailoverController {
"Can't failover to an unhealthy service", hce); "Can't failover to an unhealthy service", hce);
} catch (IOException e) { } catch (IOException e) {
throw new FailoverFailedException( throw new FailoverFailedException(
"Got an io exception", e); "Got an IO exception", e);
} }
// TODO(HA): ask toSvc if it's capable. Eg not in SM. // TODO(HA): ask toSvc if it's capable. Eg not in SM.
} }
@ -79,26 +82,42 @@ public class FailoverController {
* then try to failback. * then try to failback.
* *
* @param fromSvc currently active service * @param fromSvc currently active service
* @param fromSvcName name of currently active service * @param fromSvcAddr addr of the currently active service
* @param toSvc service to make active * @param toSvc service to make active
* @param toSvcName name of service to make active * @param toSvcAddr addr of the service to make active
* @param fencer for fencing fromSvc
* @param forceFence to fence fromSvc even if not strictly necessary
* @throws FailoverFailedException if the failover fails * @throws FailoverFailedException if the failover fails
*/ */
public static void failover(HAServiceProtocol fromSvc, String fromSvcName, public static void failover(HAServiceProtocol fromSvc,
HAServiceProtocol toSvc, String toSvcName) InetSocketAddress fromSvcAddr,
HAServiceProtocol toSvc,
InetSocketAddress toSvcAddr,
NodeFencer fencer, boolean forceFence)
throws FailoverFailedException { throws FailoverFailedException {
preFailoverChecks(toSvc, toSvcName); Preconditions.checkArgument(fencer != null, "failover requires a fencer");
preFailoverChecks(toSvc, toSvcAddr);
// Try to make fromSvc standby // Try to make fromSvc standby
boolean tryFence = true;
try { try {
HAServiceProtocolHelper.transitionToStandby(fromSvc); HAServiceProtocolHelper.transitionToStandby(fromSvc);
// We should try to fence if we failed or it was forced
tryFence = forceFence ? true : false;
} catch (ServiceFailedException sfe) { } catch (ServiceFailedException sfe) {
LOG.warn("Unable to make " + fromSvcName + " standby (" + LOG.warn("Unable to make " + fromSvcAddr + " standby (" +
sfe.getMessage() + ")"); sfe.getMessage() + ")");
} catch (Exception e) { } catch (IOException ioe) {
LOG.warn("Unable to make " + fromSvcName + LOG.warn("Unable to make " + fromSvcAddr +
" standby (unable to connect)", e); " standby (unable to connect)", ioe);
// TODO(HA): fence fromSvc and unfence on failback }
// Fence fromSvc if it's required or forced by the user
if (tryFence) {
if (!fencer.fence(fromSvcAddr)) {
throw new FailoverFailedException("Unable to fence " +
fromSvcAddr + ". Fencing failed.");
}
} }
// Try to make toSvc active // Try to make toSvc active
@ -107,30 +126,32 @@ public class FailoverController {
try { try {
HAServiceProtocolHelper.transitionToActive(toSvc); HAServiceProtocolHelper.transitionToActive(toSvc);
} catch (ServiceFailedException sfe) { } catch (ServiceFailedException sfe) {
LOG.error("Unable to make " + toSvcName + " active (" + LOG.error("Unable to make " + toSvcAddr + " active (" +
sfe.getMessage() + "). Failing back"); sfe.getMessage() + "). Failing back.");
failed = true; failed = true;
cause = sfe; cause = sfe;
} catch (Exception e) { } catch (IOException ioe) {
LOG.error("Unable to make " + toSvcName + LOG.error("Unable to make " + toSvcAddr +
" active (unable to connect). Failing back", e); " active (unable to connect). Failing back.", ioe);
failed = true; failed = true;
cause = e; cause = ioe;
} }
// Try to failback if we failed to make toSvc active // We failed to make toSvc active
if (failed) { if (failed) {
String msg = "Unable to failover to " + toSvcName; String msg = "Unable to failover to " + toSvcAddr;
// Only try to failback if we didn't fence fromSvc
if (!tryFence) {
try { try {
HAServiceProtocolHelper.transitionToActive(fromSvc); // Unconditionally fence toSvc in case it is still trying to
} catch (ServiceFailedException sfe) { // become active, eg we timed out waiting for its response.
msg = "Failback to " + fromSvcName + " failed (" + failover(toSvc, toSvcAddr, fromSvc, fromSvcAddr, fencer, true);
sfe.getMessage() + ")"; } catch (FailoverFailedException ffe) {
LOG.fatal(msg); msg += ". Failback to " + fromSvcAddr +
} catch (Exception e) { " failed (" + ffe.getMessage() + ")";
msg = "Failback to " + fromSvcName + " failed (unable to connect)";
LOG.fatal(msg); LOG.fatal(msg);
} }
}
throw new FailoverFailedException(msg, cause); throw new FailoverFailedException(msg, cause);
} }
} }

View File

@ -22,6 +22,13 @@ import java.io.PrintStream;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.util.Map; import java.util.Map;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configured; import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC;
@ -37,8 +44,11 @@ import com.google.common.collect.ImmutableMap;
* mode, or to trigger a health-check. * mode, or to trigger a health-check.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public abstract class HAAdmin extends Configured implements Tool { public abstract class HAAdmin extends Configured implements Tool {
private static final String FORCEFENCE = "forcefence";
private static Map<String, UsageInfo> USAGE = private static Map<String, UsageInfo> USAGE =
ImmutableMap.<String, UsageInfo>builder() ImmutableMap.<String, UsageInfo>builder()
.put("-transitionToActive", .put("-transitionToActive",
@ -46,8 +56,9 @@ public abstract class HAAdmin extends Configured implements Tool {
.put("-transitionToStandby", .put("-transitionToStandby",
new UsageInfo("<host:port>", "Transitions the daemon into Standby state")) new UsageInfo("<host:port>", "Transitions the daemon into Standby state"))
.put("-failover", .put("-failover",
new UsageInfo("<host:port> <host:port>", new UsageInfo("[--"+FORCEFENCE+"] <host:port> <host:port>",
"Failover from the first daemon to the second")) "Failover from the first daemon to the second.\n" +
"Unconditionally fence services if the "+FORCEFENCE+" option is used."))
.put("-getServiceState", .put("-getServiceState",
new UsageInfo("<host:port>", "Returns the state of the daemon")) new UsageInfo("<host:port>", "Returns the state of the daemon"))
.put("-checkHealth", .put("-checkHealth",
@ -111,20 +122,61 @@ public abstract class HAAdmin extends Configured implements Tool {
private int failover(final String[] argv) private int failover(final String[] argv)
throws IOException, ServiceFailedException { throws IOException, ServiceFailedException {
if (argv.length != 3) { Configuration conf = getConf();
errOut.println("failover: incorrect number of arguments"); boolean forceFence = false;
Options failoverOpts = new Options();
// "-failover" isn't really an option but we need to add
// it to appease CommandLineParser
failoverOpts.addOption("failover", false, "failover");
failoverOpts.addOption(FORCEFENCE, false, "force fencing");
CommandLineParser parser = new GnuParser();
CommandLine cmd;
try {
cmd = parser.parse(failoverOpts, argv);
forceFence = cmd.hasOption(FORCEFENCE);
} catch (ParseException pe) {
errOut.println("failover: incorrect arguments");
printUsage(errOut, "-failover"); printUsage(errOut, "-failover");
return -1; return -1;
} }
HAServiceProtocol proto1 = getProtocol(argv[1]); int numOpts = cmd.getOptions() == null ? 0 : cmd.getOptions().length;
HAServiceProtocol proto2 = getProtocol(argv[2]); final String[] args = cmd.getArgs();
if (numOpts > 2 || args.length != 2) {
errOut.println("failover: incorrect arguments");
printUsage(errOut, "-failover");
return -1;
}
NodeFencer fencer;
try { try {
FailoverController.failover(proto1, argv[1], proto2, argv[2]); fencer = NodeFencer.create(conf);
out.println("Failover from "+argv[1]+" to "+argv[2]+" successful"); } catch (BadFencingConfigurationException bfce) {
errOut.println("failover: incorrect fencing configuration: " +
bfce.getLocalizedMessage());
return -1;
}
if (fencer == null) {
errOut.println("failover: no fencer configured");
return -1;
}
InetSocketAddress addr1 = NetUtils.createSocketAddr(args[0]);
InetSocketAddress addr2 = NetUtils.createSocketAddr(args[1]);
HAServiceProtocol proto1 = getProtocol(args[0]);
HAServiceProtocol proto2 = getProtocol(args[1]);
try {
FailoverController.failover(proto1, addr1, proto2, addr2,
fencer, forceFence);
out.println("Failover from "+args[0]+" to "+args[1]+" successful");
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
errOut.println("Failover failed: " + ffe.getLocalizedMessage()); errOut.println("Failover failed: " + ffe.getLocalizedMessage());
return 1; return -1;
} }
return 0; return 0;
} }
@ -142,7 +194,7 @@ public abstract class HAAdmin extends Configured implements Tool {
HAServiceProtocolHelper.monitorHealth(proto); HAServiceProtocolHelper.monitorHealth(proto);
} catch (HealthCheckFailedException e) { } catch (HealthCheckFailedException e) {
errOut.println("Health check failed: " + e.getLocalizedMessage()); errOut.println("Health check failed: " + e.getLocalizedMessage());
return 1; return -1;
} }
return 0; return 0;
} }
@ -223,7 +275,7 @@ public abstract class HAAdmin extends Configured implements Tool {
} }
errOut.println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help); errOut.println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
return 1; return 0;
} }
private static class UsageInfo { private static class UsageInfo {

View File

@ -19,18 +19,13 @@ package org.apache.hadoop.ha;
import java.io.IOException; import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
/** /**
* Helper for making {@link HAServiceProtocol} RPC calls. This helper * Helper for making {@link HAServiceProtocol} RPC calls. This helper
* unwraps the {@link RemoteException} to specific exceptions. * unwraps the {@link RemoteException} to specific exceptions.
*
*/ */
@InterfaceAudience.Public @InterfaceAudience.Public
@InterfaceStability.Evolving @InterfaceStability.Evolving

View File

@ -82,6 +82,15 @@ public class NodeFencer {
this.methods = parseMethods(conf); this.methods = parseMethods(conf);
} }
public static NodeFencer create(Configuration conf)
throws BadFencingConfigurationException {
String confStr = conf.get(CONF_METHODS_KEY);
if (confStr == null) {
return null;
}
return new NodeFencer(conf);
}
public boolean fence(InetSocketAddress serviceAddr) { public boolean fence(InetSocketAddress serviceAddr) {
LOG.info("====== Beginning NameNode Fencing Process... ======"); LOG.info("====== Beginning NameNode Fencing Process... ======");
int i = 0; int i = 0;
@ -130,7 +139,6 @@ public class NodeFencer {
if ((m = CLASS_WITH_ARGUMENT.matcher(line)).matches()) { if ((m = CLASS_WITH_ARGUMENT.matcher(line)).matches()) {
String className = m.group(1); String className = m.group(1);
String arg = m.group(2); String arg = m.group(2);
return createFenceMethod(conf, className, arg); return createFenceMethod(conf, className, arg);
} else if ((m = CLASS_WITHOUT_ARGUMENT.matcher(line)).matches()) { } else if ((m = CLASS_WITHOUT_ARGUMENT.matcher(line)).matches()) {
String className = m.group(1); String className = m.group(1);

View File

@ -26,6 +26,9 @@ import static org.mockito.Mockito.verify;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.TestNodeFencer.AlwaysSucceedFencer;
import org.apache.hadoop.ha.TestNodeFencer.AlwaysFailFencer;
import static org.apache.hadoop.ha.TestNodeFencer.setupFencer;
import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
@ -35,6 +38,9 @@ import static org.junit.Assert.*;
public class TestFailoverController { public class TestFailoverController {
private InetSocketAddress svc1Addr = new InetSocketAddress("svc1", 1234);
private InetSocketAddress svc2Addr = new InetSocketAddress("svc2", 5678);
private class DummyService implements HAServiceProtocol { private class DummyService implements HAServiceProtocol {
HAServiceState state; HAServiceState state;
@ -55,22 +61,22 @@ public class TestFailoverController {
} }
@Override @Override
public void monitorHealth() throws HealthCheckFailedException { public void monitorHealth() throws HealthCheckFailedException, IOException {
// Do nothing // Do nothing
} }
@Override @Override
public void transitionToActive() throws ServiceFailedException { public void transitionToActive() throws ServiceFailedException, IOException {
state = HAServiceState.ACTIVE; state = HAServiceState.ACTIVE;
} }
@Override @Override
public void transitionToStandby() throws ServiceFailedException { public void transitionToStandby() throws ServiceFailedException, IOException {
state = HAServiceState.STANDBY; state = HAServiceState.STANDBY;
} }
@Override @Override
public HAServiceState getServiceState() { public HAServiceState getServiceState() throws IOException {
return state; return state;
} }
} }
@ -79,12 +85,17 @@ public class TestFailoverController {
public void testFailoverAndFailback() throws Exception { public void testFailoverAndFailback() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE); DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.STANDBY); DummyService svc2 = new DummyService(HAServiceState.STANDBY);
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
FailoverController.failover(svc1, "svc1", svc2, "svc2"); AlwaysSucceedFencer.fenceCalled = 0;
FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
assertEquals(HAServiceState.STANDBY, svc1.getServiceState()); assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
assertEquals(HAServiceState.ACTIVE, svc2.getServiceState()); assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
FailoverController.failover(svc2, "svc2", svc1, "svc1"); AlwaysSucceedFencer.fenceCalled = 0;
FailoverController.failover(svc2, svc2Addr, svc1, svc1Addr, fencer, false);
assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
assertEquals(HAServiceState.ACTIVE, svc1.getServiceState()); assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
assertEquals(HAServiceState.STANDBY, svc2.getServiceState()); assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
} }
@ -93,8 +104,9 @@ public class TestFailoverController {
public void testFailoverFromStandbyToStandby() throws Exception { public void testFailoverFromStandbyToStandby() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.STANDBY); DummyService svc1 = new DummyService(HAServiceState.STANDBY);
DummyService svc2 = new DummyService(HAServiceState.STANDBY); DummyService svc2 = new DummyService(HAServiceState.STANDBY);
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
assertEquals(HAServiceState.STANDBY, svc1.getServiceState()); assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
assertEquals(HAServiceState.ACTIVE, svc2.getServiceState()); assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
} }
@ -103,9 +115,10 @@ public class TestFailoverController {
public void testFailoverFromActiveToActive() throws Exception { public void testFailoverFromActiveToActive() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE); DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.ACTIVE); DummyService svc2 = new DummyService(HAServiceState.ACTIVE);
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
try { try {
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Can't failover to an already active service"); fail("Can't failover to an already active service");
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
// Expected // Expected
@ -116,7 +129,7 @@ public class TestFailoverController {
} }
@Test @Test
public void testFailoverToUnhealthyServiceFails() throws Exception { public void testFailoverToUnhealthyServiceFailsAndFailsback() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE); DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.STANDBY) { DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
@Override @Override
@ -124,9 +137,10 @@ public class TestFailoverController {
throw new HealthCheckFailedException("Failed!"); throw new HealthCheckFailedException("Failed!");
} }
}; };
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
try { try {
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Failover to unhealthy service"); fail("Failover to unhealthy service");
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
// Expected // Expected
@ -144,17 +158,69 @@ public class TestFailoverController {
} }
}; };
DummyService svc2 = new DummyService(HAServiceState.STANDBY); DummyService svc2 = new DummyService(HAServiceState.STANDBY);
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
AlwaysSucceedFencer.fenceCalled = 0;
try { try {
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
fail("Faulty active prevented failover"); fail("Faulty active prevented failover");
} }
// svc1 still thinks they're active, that's OK, we'll fence them
// svc1 still thinks it's active, that's OK, it was fenced
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
assertEquals("svc1:1234", AlwaysSucceedFencer.fencedSvc);
assertEquals(HAServiceState.ACTIVE, svc1.getServiceState()); assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
assertEquals(HAServiceState.ACTIVE, svc2.getServiceState()); assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
} }
@Test
public void testFailoverFromFaultyServiceFencingFailure() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
@Override
public void transitionToStandby() throws ServiceFailedException {
throw new ServiceFailedException("Failed!");
}
};
DummyService svc2 = new DummyService(HAServiceState.STANDBY);
NodeFencer fencer = setupFencer(AlwaysFailFencer.class.getName());
AlwaysFailFencer.fenceCalled = 0;
try {
FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Failed over even though fencing failed");
} catch (FailoverFailedException ffe) {
// Expected
}
assertEquals(1, AlwaysFailFencer.fenceCalled);
assertEquals("svc1:1234", AlwaysFailFencer.fencedSvc);
assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
}
@Test
public void testFencingFailureDuringFailover() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.STANDBY);
NodeFencer fencer = setupFencer(AlwaysFailFencer.class.getName());
AlwaysFailFencer.fenceCalled = 0;
try {
FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, true);
fail("Failed over even though fencing requested and failed");
} catch (FailoverFailedException ffe) {
// Expected
}
// If fencing was requested and it failed we don't try to make
// svc2 active anyway, and we don't failback to svc1.
assertEquals(1, AlwaysFailFencer.fenceCalled);
assertEquals("svc1:1234", AlwaysFailFencer.fencedSvc);
assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
}
private HAServiceProtocol getProtocol(String target) private HAServiceProtocol getProtocol(String target)
throws IOException { throws IOException {
InetSocketAddress addr = NetUtils.createSocketAddr(target); InetSocketAddress addr = NetUtils.createSocketAddr(target);
@ -166,17 +232,18 @@ public class TestFailoverController {
} }
@Test @Test
public void testFailoverFromNonExistantServiceSucceeds() throws Exception { public void testFailoverFromNonExistantServiceWithFencer() throws Exception {
HAServiceProtocol svc1 = getProtocol("localhost:1234"); HAServiceProtocol svc1 = getProtocol("localhost:1234");
DummyService svc2 = new DummyService(HAServiceState.STANDBY); DummyService svc2 = new DummyService(HAServiceState.STANDBY);
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
try { try {
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
fail("Non-existant active prevented failover"); fail("Non-existant active prevented failover");
} }
// Don't check svc1 (we can't reach it, but that's OK, we'll fence) // Don't check svc1 because we can't reach it, but that's OK, it's been fenced.
assertEquals(HAServiceState.ACTIVE, svc2.getServiceState()); assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
} }
@ -184,9 +251,10 @@ public class TestFailoverController {
public void testFailoverToNonExistantServiceFails() throws Exception { public void testFailoverToNonExistantServiceFails() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE); DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
HAServiceProtocol svc2 = getProtocol("localhost:1234"); HAServiceProtocol svc2 = getProtocol("localhost:1234");
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
try { try {
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Failed over to a non-existant standby"); fail("Failed over to a non-existant standby");
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
// Expected // Expected
@ -204,9 +272,10 @@ public class TestFailoverController {
throw new ServiceFailedException("Failed!"); throw new ServiceFailedException("Failed!");
} }
}; };
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
try { try {
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Failover to already active service"); fail("Failover to already active service");
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
// Expected // Expected
@ -219,6 +288,84 @@ public class TestFailoverController {
assertEquals(HAServiceState.STANDBY, svc2.getServiceState()); assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
} }
@Test
public void testWeDontFailbackIfActiveWasFenced() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
@Override
public void transitionToActive() throws ServiceFailedException {
throw new ServiceFailedException("Failed!");
}
};
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
try {
FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, true);
fail("Failed over to service that won't transition to active");
} catch (FailoverFailedException ffe) {
// Expected
}
// We failed to failover and did not failback because we fenced
// svc1 (we forced it), therefore svc1 and svc2 should be standby.
assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
}
@Test
public void testWeFenceOnFailbackIfTransitionToActiveFails() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
@Override
public void transitionToActive() throws ServiceFailedException, IOException {
throw new IOException("Failed!");
}
};
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
AlwaysSucceedFencer.fenceCalled = 0;
try {
FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Failed over to service that won't transition to active");
} catch (FailoverFailedException ffe) {
// Expected
}
// We failed to failover. We did not fence svc1 because it cooperated
// and we didn't force it, so we failed back to svc1 and fenced svc2.
// Note svc2 still thinks it's active, that's OK, we fenced it.
assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
assertEquals("svc2:5678", AlwaysSucceedFencer.fencedSvc);
}
@Test
public void testFailureToFenceOnFailbackFailsTheFailback() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
@Override
public void transitionToActive() throws ServiceFailedException, IOException {
throw new IOException("Failed!");
}
};
NodeFencer fencer = setupFencer(AlwaysFailFencer.class.getName());
AlwaysFailFencer.fenceCalled = 0;
try {
FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Failed over to service that won't transition to active");
} catch (FailoverFailedException ffe) {
// Expected
}
// We did not fence svc1 because it cooperated and we didn't force it,
// we failed to failover so we fenced svc2, we failed to fence svc2
// so we did not failback to svc1, ie it's still standby.
assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
assertEquals(1, AlwaysFailFencer.fenceCalled);
assertEquals("svc2:5678", AlwaysFailFencer.fencedSvc);
}
@Test @Test
public void testFailbackToFaultyServiceFails() throws Exception { public void testFailbackToFaultyServiceFails() throws Exception {
DummyService svc1 = new DummyService(HAServiceState.ACTIVE) { DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
@ -233,9 +380,10 @@ public class TestFailoverController {
throw new ServiceFailedException("Failed!"); throw new ServiceFailedException("Failed!");
} }
}; };
NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
try { try {
FailoverController.failover(svc1, "svc1", svc2, "svc2"); FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
fail("Failover to already active service"); fail("Failover to already active service");
} catch (FailoverFailedException ffe) { } catch (FailoverFailedException ffe) {
// Expected // Expected

View File

@ -80,47 +80,103 @@ public class TestHAAdmin {
assertOutputContains("transitionToActive: incorrect number of arguments"); assertOutputContains("transitionToActive: incorrect number of arguments");
assertEquals(-1, runTool("-transitionToActive", "x", "y")); assertEquals(-1, runTool("-transitionToActive", "x", "y"));
assertOutputContains("transitionToActive: incorrect number of arguments"); assertOutputContains("transitionToActive: incorrect number of arguments");
assertEquals(-1, runTool("-failover"));
assertOutputContains("failover: incorrect arguments");
assertOutputContains("failover: incorrect arguments");
assertEquals(-1, runTool("-failover", "foo:1234"));
assertOutputContains("failover: incorrect arguments");
} }
@Test @Test
public void testHelp() throws Exception { public void testHelp() throws Exception {
assertEquals(-1, runTool("-help")); assertEquals(-1, runTool("-help"));
assertEquals(1, runTool("-help", "transitionToActive")); assertEquals(0, runTool("-help", "transitionToActive"));
assertOutputContains("Transitions the daemon into Active"); assertOutputContains("Transitions the daemon into Active");
} }
@Test @Test
public void testTransitionToActive() throws Exception { public void testTransitionToActive() throws Exception {
assertEquals(0, runTool("-transitionToActive", "xxx")); assertEquals(0, runTool("-transitionToActive", "foo:1234"));
Mockito.verify(mockProtocol).transitionToActive(); Mockito.verify(mockProtocol).transitionToActive();
} }
@Test @Test
public void testTransitionToStandby() throws Exception { public void testTransitionToStandby() throws Exception {
assertEquals(0, runTool("-transitionToStandby", "xxx")); assertEquals(0, runTool("-transitionToStandby", "foo:1234"));
Mockito.verify(mockProtocol).transitionToStandby(); Mockito.verify(mockProtocol).transitionToStandby();
} }
@Test @Test
public void testFailover() throws Exception { public void testFailoverWithNoFencerConfigured() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState(); Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
assertEquals(0, runTool("-failover", "xxx", "yyy")); assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678"));
}
@Test
public void testFailoverWithFencerConfigured() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
Configuration conf = new Configuration();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf);
assertEquals(0, runTool("-failover", "foo:1234", "bar:5678"));
}
@Test
public void testFailoverWithFencerConfiguredAndForce() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
Configuration conf = new Configuration();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf);
assertEquals(0, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
}
@Test
public void testFailoverWithInvalidFenceArg() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
Configuration conf = new Configuration();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf);
assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "notforcefence"));
}
@Test
public void testFailoverWithFenceButNoFencer() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
}
@Test
public void testFailoverWithFenceAndBadFencer() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
Configuration conf = new Configuration();
conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!");
tool.setConf(conf);
assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
}
@Test
public void testForceFenceOptionListedBeforeArgs() throws Exception {
Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
Configuration conf = new Configuration();
conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
tool.setConf(conf);
assertEquals(0, runTool("-failover", "--forcefence", "foo:1234", "bar:5678"));
} }
@Test @Test
public void testGetServiceState() throws Exception { public void testGetServiceState() throws Exception {
assertEquals(0, runTool("-getServiceState", "xxx")); assertEquals(0, runTool("-getServiceState", "foo:1234"));
Mockito.verify(mockProtocol).getServiceState(); Mockito.verify(mockProtocol).getServiceState();
} }
@Test @Test
public void testCheckHealth() throws Exception { public void testCheckHealth() throws Exception {
assertEquals(0, runTool("-checkHealth", "xxx")); assertEquals(0, runTool("-checkHealth", "foo:1234"));
Mockito.verify(mockProtocol).monitorHealth(); Mockito.verify(mockProtocol).monitorHealth();
Mockito.doThrow(new HealthCheckFailedException("fake health check failure")) Mockito.doThrow(new HealthCheckFailedException("fake health check failure"))
.when(mockProtocol).monitorHealth(); .when(mockProtocol).monitorHealth();
assertEquals(1, runTool("-checkHealth", "xxx")); assertEquals(-1, runTool("-checkHealth", "foo:1234"));
assertOutputContains("Health check failed: fake health check failure"); assertOutputContains("Health check failed: fake health check failure");
} }

View File

@ -119,12 +119,11 @@ public class TestNodeFencer {
assertFalse(fencer.fence(new InetSocketAddress("host", 1234))); assertFalse(fencer.fence(new InetSocketAddress("host", 1234)));
} }
private NodeFencer setupFencer(String confStr) public static NodeFencer setupFencer(String confStr)
throws BadFencingConfigurationException { throws BadFencingConfigurationException {
System.err.println("Testing configuration:\n" + confStr); System.err.println("Testing configuration:\n" + confStr);
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set(NodeFencer.CONF_METHODS_KEY, conf.set(NodeFencer.CONF_METHODS_KEY, confStr);
confStr);
return new NodeFencer(conf); return new NodeFencer(conf);
} }

View File

@ -106,6 +106,28 @@ public class TestHAStateTransitions {
} }
} }
/**
* Test that transitioning a service to the state that it is already
* in is a nop, specifically, an exception is not thrown.
*/
@Test
public void testTransitionToCurrentStateIsANop() throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(1)
.build();
try {
cluster.waitActive();
cluster.transitionToActive(0);
cluster.transitionToActive(0);
cluster.transitionToStandby(0);
cluster.transitionToStandby(0);
} finally {
cluster.shutdown();
}
}
/** /**
* Test manual failover failback for one namespace * Test manual failover failback for one namespace
* @param cluster single process test cluster * @param cluster single process test cluster