HDFS-15404. ShellCommandFencer should expose info about source. Contributed by Chen Liang.
(cherry picked from commit 3833c616e0
)
This commit is contained in:
parent
0d949d375e
commit
370efc6d95
|
@ -213,7 +213,7 @@ public class FailoverController {
|
|||
|
||||
// Fence fromSvc if it's required or forced by the user
|
||||
if (tryFence) {
|
||||
if (!fromSvc.getFencer().fence(fromSvc)) {
|
||||
if (!fromSvc.getFencer().fence(fromSvc, toSvc)) {
|
||||
throw new FailoverFailedException("Unable to fence " +
|
||||
fromSvc + ". Fencing failed.");
|
||||
}
|
||||
|
|
|
@ -44,6 +44,12 @@ public abstract class HAServiceTarget {
|
|||
private static final String PORT_SUBST_KEY = "port";
|
||||
private static final String ADDRESS_SUBST_KEY = "address";
|
||||
|
||||
/**
|
||||
* The HAState this service target is intended to be after transition
|
||||
* is complete.
|
||||
*/
|
||||
private HAServiceProtocol.HAServiceState transitionTargetHAStatus;
|
||||
|
||||
/**
|
||||
* @return the IPC address of the target node.
|
||||
*/
|
||||
|
@ -93,6 +99,15 @@ public abstract class HAServiceTarget {
|
|||
return getProxyForAddress(conf, timeoutMs, getAddress());
|
||||
}
|
||||
|
||||
public void setTransitionTargetHAStatus(
|
||||
HAServiceProtocol.HAServiceState status) {
|
||||
this.transitionTargetHAStatus = status;
|
||||
}
|
||||
|
||||
public HAServiceProtocol.HAServiceState getTransitionTargetHAStatus() {
|
||||
return this.transitionTargetHAStatus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a proxy to connect to the target HA service for health monitoring.
|
||||
* If {@link #getHealthMonitorAddress()} is implemented to return a non-null
|
||||
|
|
|
@ -89,16 +89,33 @@ public class NodeFencer {
|
|||
}
|
||||
|
||||
public boolean fence(HAServiceTarget fromSvc) {
|
||||
return fence(fromSvc, null);
|
||||
}
|
||||
|
||||
public boolean fence(HAServiceTarget fromSvc, HAServiceTarget toSvc) {
|
||||
LOG.info("====== Beginning Service Fencing Process... ======");
|
||||
int i = 0;
|
||||
for (FenceMethodWithArg method : methods) {
|
||||
LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
|
||||
|
||||
try {
|
||||
// only true when target node is given, AND fencing on it failed
|
||||
boolean toSvcFencingFailed = false;
|
||||
// if target is given, try to fence on target first. Only if fencing
|
||||
// on target succeeded, do fencing on source node.
|
||||
if (toSvc != null) {
|
||||
toSvcFencingFailed = !method.method.tryFence(toSvc, method.arg);
|
||||
}
|
||||
if (toSvcFencingFailed) {
|
||||
LOG.error("====== Fencing on target failed, skipping fencing "
|
||||
+ "on source ======");
|
||||
} else {
|
||||
if (method.method.tryFence(fromSvc, method.arg)) {
|
||||
LOG.info("====== Fencing successful by method " + method + " ======");
|
||||
LOG.info("====== Fencing successful by method "
|
||||
+ method + " ======");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} catch (BadFencingConfigurationException e) {
|
||||
LOG.error("Fencing method " + method + " misconfigured", e);
|
||||
continue;
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.ha;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
|
@ -60,6 +61,11 @@ public class ShellCommandFencer
|
|||
/** Prefix for target parameters added to the environment */
|
||||
private static final String TARGET_PREFIX = "target_";
|
||||
|
||||
/** Prefix for source parameters added to the environment */
|
||||
private static final String SOURCE_PREFIX = "source_";
|
||||
|
||||
private static final String ARG_DELIMITER = ",";
|
||||
|
||||
@VisibleForTesting
|
||||
static Logger LOG = LoggerFactory.getLogger(ShellCommandFencer.class);
|
||||
|
||||
|
@ -73,8 +79,9 @@ public class ShellCommandFencer
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean tryFence(HAServiceTarget target, String cmd) {
|
||||
public boolean tryFence(HAServiceTarget target, String args) {
|
||||
ProcessBuilder builder;
|
||||
String cmd = parseArgs(target.getTransitionTargetHAStatus(), args);
|
||||
|
||||
if (!Shell.WINDOWS) {
|
||||
builder = new ProcessBuilder("bash", "-e", "-c", cmd);
|
||||
|
@ -127,6 +134,28 @@ public class ShellCommandFencer
|
|||
return rc == 0;
|
||||
}
|
||||
|
||||
private String parseArgs(HAServiceProtocol.HAServiceState state,
|
||||
String cmd) {
|
||||
String[] args = cmd.split(ARG_DELIMITER);
|
||||
if (args.length == 1) {
|
||||
// only one command is given, assuming both src and dst
|
||||
// will execute the same command/script.
|
||||
return args[0];
|
||||
}
|
||||
if (args.length > 2) {
|
||||
throw new IllegalArgumentException("Expecting arguments size of at most "
|
||||
+ "two, getting " + Arrays.asList(args));
|
||||
}
|
||||
if (HAServiceProtocol.HAServiceState.ACTIVE.equals(state)) {
|
||||
return args[0];
|
||||
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(state)) {
|
||||
return args[1];
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Unexpected HA service state:" + state);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Abbreviate a string by putting '...' in the middle of it,
|
||||
* in an attempt to keep logs from getting too messy.
|
||||
|
@ -190,9 +219,24 @@ public class ShellCommandFencer
|
|||
*/
|
||||
private void addTargetInfoAsEnvVars(HAServiceTarget target,
|
||||
Map<String, String> environment) {
|
||||
String prefix;
|
||||
HAServiceProtocol.HAServiceState targetState =
|
||||
target.getTransitionTargetHAStatus();
|
||||
if (targetState == null ||
|
||||
HAServiceProtocol.HAServiceState.ACTIVE.equals(targetState)) {
|
||||
// null is assumed to be same as ACTIVE, this is to be compatible
|
||||
// with existing tests/use cases where target state is not specified
|
||||
// but assuming it's active.
|
||||
prefix = TARGET_PREFIX;
|
||||
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(targetState)) {
|
||||
prefix = SOURCE_PREFIX;
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Unexpected HA service state:" + targetState);
|
||||
}
|
||||
for (Map.Entry<String, String> e :
|
||||
target.getFencingParameters().entrySet()) {
|
||||
String key = TARGET_PREFIX + e.getKey();
|
||||
String key = prefix + e.getKey();
|
||||
key = key.replace('.', '_');
|
||||
environment.put(key, e.getValue());
|
||||
}
|
||||
|
|
|
@ -177,7 +177,7 @@ public class TestFailoverController {
|
|||
}
|
||||
|
||||
// svc1 still thinks it's active, that's OK, it was fenced
|
||||
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
|
||||
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
|
||||
assertSame(svc1, AlwaysSucceedFencer.fencedSvc);
|
||||
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
||||
assertEquals(HAServiceState.ACTIVE, svc2.state);
|
||||
|
@ -201,7 +201,7 @@ public class TestFailoverController {
|
|||
}
|
||||
|
||||
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
||||
assertSame(svc1, AlwaysFailFencer.fencedSvc);
|
||||
assertSame(svc2, AlwaysFailFencer.fencedSvc);
|
||||
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
||||
assertEquals(HAServiceState.STANDBY, svc2.state);
|
||||
}
|
||||
|
@ -223,7 +223,7 @@ public class TestFailoverController {
|
|||
// If fencing was requested and it failed we don't try to make
|
||||
// svc2 active anyway, and we don't failback to svc1.
|
||||
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
||||
assertSame(svc1, AlwaysFailFencer.fencedSvc);
|
||||
assertSame(svc2, AlwaysFailFencer.fencedSvc);
|
||||
assertEquals(HAServiceState.STANDBY, svc1.state);
|
||||
assertEquals(HAServiceState.STANDBY, svc2.state);
|
||||
}
|
||||
|
@ -344,7 +344,7 @@ public class TestFailoverController {
|
|||
// and we didn't force it, so we failed back to svc1 and fenced svc2.
|
||||
// Note svc2 still thinks it's active, that's OK, we fenced it.
|
||||
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
||||
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
|
||||
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
|
||||
assertSame(svc2, AlwaysSucceedFencer.fencedSvc);
|
||||
}
|
||||
|
||||
|
@ -373,7 +373,7 @@ public class TestFailoverController {
|
|||
// so we did not failback to svc1, ie it's still standby.
|
||||
assertEquals(HAServiceState.STANDBY, svc1.state);
|
||||
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
||||
assertSame(svc2, AlwaysFailFencer.fencedSvc);
|
||||
assertSame(svc1, AlwaysFailFencer.fencedSvc);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -163,6 +163,37 @@ public class TestShellCommandFencer {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if fencing target has peer set, the failover can trigger different
|
||||
* commands on source and destination respectively.
|
||||
*/
|
||||
@Test
|
||||
public void testEnvironmentWithPeer() {
|
||||
HAServiceTarget target = new DummyHAService(HAServiceState.ACTIVE,
|
||||
new InetSocketAddress("dummytarget", 1111));
|
||||
HAServiceTarget source = new DummyHAService(HAServiceState.STANDBY,
|
||||
new InetSocketAddress("dummysource", 2222));
|
||||
target.setTransitionTargetHAStatus(HAServiceState.ACTIVE);
|
||||
source.setTransitionTargetHAStatus(HAServiceState.STANDBY);
|
||||
String cmd = "echo $target_host $target_port,"
|
||||
+ "echo $source_host $source_port";
|
||||
if (!Shell.WINDOWS) {
|
||||
fencer.tryFence(target, cmd);
|
||||
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||
Mockito.contains("echo $ta...rget_port: dummytarget 1111"));
|
||||
fencer.tryFence(source, cmd);
|
||||
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||
Mockito.contains("echo $so...urce_port: dummysource 2222"));
|
||||
} else {
|
||||
fencer.tryFence(target, cmd);
|
||||
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||
Mockito.contains("echo %ta...get_port%: dummytarget 1111"));
|
||||
fencer.tryFence(source, cmd);
|
||||
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||
Mockito.contains("echo %so...urce_port%: dummysource 2222"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test that we properly close off our input to the subprocess
|
||||
|
|
|
@ -284,6 +284,11 @@ public class DFSHAAdmin extends HAAdmin {
|
|||
HAServiceTarget fromNode = resolveTarget(args[0]);
|
||||
HAServiceTarget toNode = resolveTarget(args[1]);
|
||||
|
||||
fromNode.setTransitionTargetHAStatus(
|
||||
HAServiceProtocol.HAServiceState.STANDBY);
|
||||
toNode.setTransitionTargetHAStatus(
|
||||
HAServiceProtocol.HAServiceState.ACTIVE);
|
||||
|
||||
// Check that auto-failover is consistently configured for both nodes.
|
||||
Preconditions.checkState(
|
||||
fromNode.isAutoFailoverEnabled() ==
|
||||
|
|
|
@ -189,13 +189,13 @@ public class TestDFSHAAdminMiniCluster {
|
|||
tmpFile.deleteOnExit();
|
||||
if (Shell.WINDOWS) {
|
||||
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
||||
"shell(echo %target_nameserviceid%.%target_namenodeid% " +
|
||||
"%target_port% %dfs_ha_namenode_id% > " +
|
||||
"shell(echo %source_nameserviceid%.%source_namenodeid% " +
|
||||
"%source_port% %dfs_ha_namenode_id% > " +
|
||||
tmpFile.getAbsolutePath() + ")");
|
||||
} else {
|
||||
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
||||
"shell(echo -n $target_nameserviceid.$target_namenodeid " +
|
||||
"$target_port $dfs_ha_namenode_id > " +
|
||||
"shell(echo -n $source_nameserviceid.$source_namenodeid " +
|
||||
"$source_port $dfs_ha_namenode_id > " +
|
||||
tmpFile.getAbsolutePath() + ")");
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue