HDFS-15404. ShellCommandFencer should expose info about source. Contributed by Chen Liang.

This commit is contained in:
Chen Liang 2020-07-20 12:49:58 -07:00
parent 736bed6d6d
commit 3833c616e0
8 changed files with 127 additions and 15 deletions

View File

@ -213,7 +213,7 @@ public void failover(HAServiceTarget fromSvc,
// Fence fromSvc if it's required or forced by the user
if (tryFence) {
if (!fromSvc.getFencer().fence(fromSvc)) {
if (!fromSvc.getFencer().fence(fromSvc, toSvc)) {
throw new FailoverFailedException("Unable to fence " +
fromSvc + ". Fencing failed.");
}

View File

@ -44,6 +44,12 @@ public abstract class HAServiceTarget {
private static final String PORT_SUBST_KEY = "port";
private static final String ADDRESS_SUBST_KEY = "address";
/**
* The HAState this service target is intended to be after transition
* is complete.
*/
private HAServiceProtocol.HAServiceState transitionTargetHAStatus;
/**
* @return the IPC address of the target node.
*/
@ -93,6 +99,15 @@ public HAServiceProtocol getProxy(Configuration conf, int timeoutMs)
return getProxyForAddress(conf, timeoutMs, getAddress());
}
public void setTransitionTargetHAStatus(
HAServiceProtocol.HAServiceState status) {
this.transitionTargetHAStatus = status;
}
public HAServiceProtocol.HAServiceState getTransitionTargetHAStatus() {
return this.transitionTargetHAStatus;
}
/**
* Returns a proxy to connect to the target HA service for health monitoring.
* If {@link #getHealthMonitorAddress()} is implemented to return a non-null

View File

@ -89,15 +89,32 @@ public static NodeFencer create(Configuration conf, String confKey)
}
public boolean fence(HAServiceTarget fromSvc) {
return fence(fromSvc, null);
}
public boolean fence(HAServiceTarget fromSvc, HAServiceTarget toSvc) {
LOG.info("====== Beginning Service Fencing Process... ======");
int i = 0;
for (FenceMethodWithArg method : methods) {
LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
try {
if (method.method.tryFence(fromSvc, method.arg)) {
LOG.info("====== Fencing successful by method " + method + " ======");
return true;
// only true when target node is given, AND fencing on it failed
boolean toSvcFencingFailed = false;
// if target is given, try to fence on target first. Only if fencing
// on target succeeded, do fencing on source node.
if (toSvc != null) {
toSvcFencingFailed = !method.method.tryFence(toSvc, method.arg);
}
if (toSvcFencingFailed) {
LOG.error("====== Fencing on target failed, skipping fencing "
+ "on source ======");
} else {
if (method.method.tryFence(fromSvc, method.arg)) {
LOG.info("====== Fencing successful by method "
+ method + " ======");
return true;
}
}
} catch (BadFencingConfigurationException e) {
LOG.error("Fencing method " + method + " misconfigured", e);

View File

@ -19,6 +19,7 @@
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.Map;
import org.apache.hadoop.conf.Configured;
@ -60,6 +61,11 @@ public class ShellCommandFencer
/** Prefix for target parameters added to the environment */
private static final String TARGET_PREFIX = "target_";
/** Prefix for source parameters added to the environment */
private static final String SOURCE_PREFIX = "source_";
private static final String ARG_DELIMITER = ",";
@VisibleForTesting
static Logger LOG = LoggerFactory.getLogger(ShellCommandFencer.class);
@ -73,8 +79,9 @@ public void checkArgs(String args) throws BadFencingConfigurationException {
}
@Override
public boolean tryFence(HAServiceTarget target, String cmd) {
public boolean tryFence(HAServiceTarget target, String args) {
ProcessBuilder builder;
String cmd = parseArgs(target.getTransitionTargetHAStatus(), args);
if (!Shell.WINDOWS) {
builder = new ProcessBuilder("bash", "-e", "-c", cmd);
@ -127,6 +134,28 @@ public boolean tryFence(HAServiceTarget target, String cmd) {
return rc == 0;
}
private String parseArgs(HAServiceProtocol.HAServiceState state,
String cmd) {
String[] args = cmd.split(ARG_DELIMITER);
if (args.length == 1) {
// only one command is given, assuming both src and dst
// will execute the same command/script.
return args[0];
}
if (args.length > 2) {
throw new IllegalArgumentException("Expecting arguments size of at most "
+ "two, getting " + Arrays.asList(args));
}
if (HAServiceProtocol.HAServiceState.ACTIVE.equals(state)) {
return args[0];
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(state)) {
return args[1];
} else {
throw new IllegalArgumentException(
"Unexpected HA service state:" + state);
}
}
/**
* Abbreviate a string by putting '...' in the middle of it,
* in an attempt to keep logs from getting too messy.
@ -190,9 +219,24 @@ private void setConfAsEnvVars(Map<String, String> env) {
*/
private void addTargetInfoAsEnvVars(HAServiceTarget target,
Map<String, String> environment) {
String prefix;
HAServiceProtocol.HAServiceState targetState =
target.getTransitionTargetHAStatus();
if (targetState == null ||
HAServiceProtocol.HAServiceState.ACTIVE.equals(targetState)) {
// null is assumed to be same as ACTIVE, this is to be compatible
// with existing tests/use cases where target state is not specified
// but assuming it's active.
prefix = TARGET_PREFIX;
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(targetState)) {
prefix = SOURCE_PREFIX;
} else {
throw new IllegalArgumentException(
"Unexpected HA service state:" + targetState);
}
for (Map.Entry<String, String> e :
target.getFencingParameters().entrySet()) {
String key = TARGET_PREFIX + e.getKey();
String key = prefix + e.getKey();
key = key.replace('.', '_');
environment.put(key, e.getValue());
}

View File

@ -177,7 +177,7 @@ public void testFailoverFromFaultyServiceSucceeds() throws Exception {
}
// svc1 still thinks it's active, that's OK, it was fenced
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
assertSame(svc1, AlwaysSucceedFencer.fencedSvc);
assertEquals(HAServiceState.ACTIVE, svc1.state);
assertEquals(HAServiceState.ACTIVE, svc2.state);
@ -201,7 +201,7 @@ public void testFailoverFromFaultyServiceFencingFailure() throws Exception {
}
assertEquals(1, AlwaysFailFencer.fenceCalled);
assertSame(svc1, AlwaysFailFencer.fencedSvc);
assertSame(svc2, AlwaysFailFencer.fencedSvc);
assertEquals(HAServiceState.ACTIVE, svc1.state);
assertEquals(HAServiceState.STANDBY, svc2.state);
}
@ -223,7 +223,7 @@ public void testFencingFailureDuringFailover() throws Exception {
// If fencing was requested and it failed we don't try to make
// svc2 active anyway, and we don't failback to svc1.
assertEquals(1, AlwaysFailFencer.fenceCalled);
assertSame(svc1, AlwaysFailFencer.fencedSvc);
assertSame(svc2, AlwaysFailFencer.fencedSvc);
assertEquals(HAServiceState.STANDBY, svc1.state);
assertEquals(HAServiceState.STANDBY, svc2.state);
}
@ -344,7 +344,7 @@ public void testWeFenceOnFailbackIfTransitionToActiveFails() throws Exception {
// and we didn't force it, so we failed back to svc1 and fenced svc2.
// Note svc2 still thinks it's active, that's OK, we fenced it.
assertEquals(HAServiceState.ACTIVE, svc1.state);
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
assertSame(svc2, AlwaysSucceedFencer.fencedSvc);
}
@ -373,7 +373,7 @@ public void testFailureToFenceOnFailbackFailsTheFailback() throws Exception {
// so we did not failback to svc1, ie it's still standby.
assertEquals(HAServiceState.STANDBY, svc1.state);
assertEquals(1, AlwaysFailFencer.fenceCalled);
assertSame(svc2, AlwaysFailFencer.fencedSvc);
assertSame(svc1, AlwaysFailFencer.fencedSvc);
}
@Test

View File

@ -163,6 +163,37 @@ public void testTargetAsEnvironment() {
}
}
/**
* Test if fencing target has peer set, the failover can trigger different
* commands on source and destination respectively.
*/
@Test
public void testEnvironmentWithPeer() {
HAServiceTarget target = new DummyHAService(HAServiceState.ACTIVE,
new InetSocketAddress("dummytarget", 1111));
HAServiceTarget source = new DummyHAService(HAServiceState.STANDBY,
new InetSocketAddress("dummysource", 2222));
target.setTransitionTargetHAStatus(HAServiceState.ACTIVE);
source.setTransitionTargetHAStatus(HAServiceState.STANDBY);
String cmd = "echo $target_host $target_port,"
+ "echo $source_host $source_port";
if (!Shell.WINDOWS) {
fencer.tryFence(target, cmd);
Mockito.verify(ShellCommandFencer.LOG).info(
Mockito.contains("echo $ta...rget_port: dummytarget 1111"));
fencer.tryFence(source, cmd);
Mockito.verify(ShellCommandFencer.LOG).info(
Mockito.contains("echo $so...urce_port: dummysource 2222"));
} else {
fencer.tryFence(target, cmd);
Mockito.verify(ShellCommandFencer.LOG).info(
Mockito.contains("echo %ta...get_port%: dummytarget 1111"));
fencer.tryFence(source, cmd);
Mockito.verify(ShellCommandFencer.LOG).info(
Mockito.contains("echo %so...urce_port%: dummysource 2222"));
}
}
/**
* Test that we properly close off our input to the subprocess

View File

@ -284,6 +284,11 @@ private int failover(CommandLine cmd)
HAServiceTarget fromNode = resolveTarget(args[0]);
HAServiceTarget toNode = resolveTarget(args[1]);
fromNode.setTransitionTargetHAStatus(
HAServiceProtocol.HAServiceState.STANDBY);
toNode.setTransitionTargetHAStatus(
HAServiceProtocol.HAServiceState.ACTIVE);
// Check that auto-failover is consistently configured for both nodes.
Preconditions.checkState(
fromNode.isAutoFailoverEnabled() ==

View File

@ -189,13 +189,13 @@ public void testFencer() throws Exception {
tmpFile.deleteOnExit();
if (Shell.WINDOWS) {
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
"shell(echo %target_nameserviceid%.%target_namenodeid% " +
"%target_port% %dfs_ha_namenode_id% > " +
"shell(echo %source_nameserviceid%.%source_namenodeid% " +
"%source_port% %dfs_ha_namenode_id% > " +
tmpFile.getAbsolutePath() + ")");
} else {
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
"shell(echo -n $target_nameserviceid.$target_namenodeid " +
"$target_port $dfs_ha_namenode_id > " +
"shell(echo -n $source_nameserviceid.$source_namenodeid " +
"$source_port $dfs_ha_namenode_id > " +
tmpFile.getAbsolutePath() + ")");
}