HDFS-15404. ShellCommandFencer should expose info about source. Contributed by Chen Liang.
This commit is contained in:
parent
736bed6d6d
commit
3833c616e0
|
@ -213,7 +213,7 @@ public class FailoverController {
|
||||||
|
|
||||||
// Fence fromSvc if it's required or forced by the user
|
// Fence fromSvc if it's required or forced by the user
|
||||||
if (tryFence) {
|
if (tryFence) {
|
||||||
if (!fromSvc.getFencer().fence(fromSvc)) {
|
if (!fromSvc.getFencer().fence(fromSvc, toSvc)) {
|
||||||
throw new FailoverFailedException("Unable to fence " +
|
throw new FailoverFailedException("Unable to fence " +
|
||||||
fromSvc + ". Fencing failed.");
|
fromSvc + ". Fencing failed.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,6 +44,12 @@ public abstract class HAServiceTarget {
|
||||||
private static final String PORT_SUBST_KEY = "port";
|
private static final String PORT_SUBST_KEY = "port";
|
||||||
private static final String ADDRESS_SUBST_KEY = "address";
|
private static final String ADDRESS_SUBST_KEY = "address";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The HAState this service target is intended to be after transition
|
||||||
|
* is complete.
|
||||||
|
*/
|
||||||
|
private HAServiceProtocol.HAServiceState transitionTargetHAStatus;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the IPC address of the target node.
|
* @return the IPC address of the target node.
|
||||||
*/
|
*/
|
||||||
|
@ -93,6 +99,15 @@ public abstract class HAServiceTarget {
|
||||||
return getProxyForAddress(conf, timeoutMs, getAddress());
|
return getProxyForAddress(conf, timeoutMs, getAddress());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setTransitionTargetHAStatus(
|
||||||
|
HAServiceProtocol.HAServiceState status) {
|
||||||
|
this.transitionTargetHAStatus = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HAServiceProtocol.HAServiceState getTransitionTargetHAStatus() {
|
||||||
|
return this.transitionTargetHAStatus;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a proxy to connect to the target HA service for health monitoring.
|
* Returns a proxy to connect to the target HA service for health monitoring.
|
||||||
* If {@link #getHealthMonitorAddress()} is implemented to return a non-null
|
* If {@link #getHealthMonitorAddress()} is implemented to return a non-null
|
||||||
|
|
|
@ -89,15 +89,32 @@ public class NodeFencer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean fence(HAServiceTarget fromSvc) {
|
public boolean fence(HAServiceTarget fromSvc) {
|
||||||
|
return fence(fromSvc, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean fence(HAServiceTarget fromSvc, HAServiceTarget toSvc) {
|
||||||
LOG.info("====== Beginning Service Fencing Process... ======");
|
LOG.info("====== Beginning Service Fencing Process... ======");
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (FenceMethodWithArg method : methods) {
|
for (FenceMethodWithArg method : methods) {
|
||||||
LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
|
LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (method.method.tryFence(fromSvc, method.arg)) {
|
// only true when target node is given, AND fencing on it failed
|
||||||
LOG.info("====== Fencing successful by method " + method + " ======");
|
boolean toSvcFencingFailed = false;
|
||||||
return true;
|
// if target is given, try to fence on target first. Only if fencing
|
||||||
|
// on target succeeded, do fencing on source node.
|
||||||
|
if (toSvc != null) {
|
||||||
|
toSvcFencingFailed = !method.method.tryFence(toSvc, method.arg);
|
||||||
|
}
|
||||||
|
if (toSvcFencingFailed) {
|
||||||
|
LOG.error("====== Fencing on target failed, skipping fencing "
|
||||||
|
+ "on source ======");
|
||||||
|
} else {
|
||||||
|
if (method.method.tryFence(fromSvc, method.arg)) {
|
||||||
|
LOG.info("====== Fencing successful by method "
|
||||||
|
+ method + " ======");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (BadFencingConfigurationException e) {
|
} catch (BadFencingConfigurationException e) {
|
||||||
LOG.error("Fencing method " + method + " misconfigured", e);
|
LOG.error("Fencing method " + method + " misconfigured", e);
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.ha;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
@ -60,6 +61,11 @@ public class ShellCommandFencer
|
||||||
/** Prefix for target parameters added to the environment */
|
/** Prefix for target parameters added to the environment */
|
||||||
private static final String TARGET_PREFIX = "target_";
|
private static final String TARGET_PREFIX = "target_";
|
||||||
|
|
||||||
|
/** Prefix for source parameters added to the environment */
|
||||||
|
private static final String SOURCE_PREFIX = "source_";
|
||||||
|
|
||||||
|
private static final String ARG_DELIMITER = ",";
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static Logger LOG = LoggerFactory.getLogger(ShellCommandFencer.class);
|
static Logger LOG = LoggerFactory.getLogger(ShellCommandFencer.class);
|
||||||
|
|
||||||
|
@ -73,8 +79,9 @@ public class ShellCommandFencer
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean tryFence(HAServiceTarget target, String cmd) {
|
public boolean tryFence(HAServiceTarget target, String args) {
|
||||||
ProcessBuilder builder;
|
ProcessBuilder builder;
|
||||||
|
String cmd = parseArgs(target.getTransitionTargetHAStatus(), args);
|
||||||
|
|
||||||
if (!Shell.WINDOWS) {
|
if (!Shell.WINDOWS) {
|
||||||
builder = new ProcessBuilder("bash", "-e", "-c", cmd);
|
builder = new ProcessBuilder("bash", "-e", "-c", cmd);
|
||||||
|
@ -127,6 +134,28 @@ public class ShellCommandFencer
|
||||||
return rc == 0;
|
return rc == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String parseArgs(HAServiceProtocol.HAServiceState state,
|
||||||
|
String cmd) {
|
||||||
|
String[] args = cmd.split(ARG_DELIMITER);
|
||||||
|
if (args.length == 1) {
|
||||||
|
// only one command is given, assuming both src and dst
|
||||||
|
// will execute the same command/script.
|
||||||
|
return args[0];
|
||||||
|
}
|
||||||
|
if (args.length > 2) {
|
||||||
|
throw new IllegalArgumentException("Expecting arguments size of at most "
|
||||||
|
+ "two, getting " + Arrays.asList(args));
|
||||||
|
}
|
||||||
|
if (HAServiceProtocol.HAServiceState.ACTIVE.equals(state)) {
|
||||||
|
return args[0];
|
||||||
|
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(state)) {
|
||||||
|
return args[1];
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Unexpected HA service state:" + state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abbreviate a string by putting '...' in the middle of it,
|
* Abbreviate a string by putting '...' in the middle of it,
|
||||||
* in an attempt to keep logs from getting too messy.
|
* in an attempt to keep logs from getting too messy.
|
||||||
|
@ -190,9 +219,24 @@ public class ShellCommandFencer
|
||||||
*/
|
*/
|
||||||
private void addTargetInfoAsEnvVars(HAServiceTarget target,
|
private void addTargetInfoAsEnvVars(HAServiceTarget target,
|
||||||
Map<String, String> environment) {
|
Map<String, String> environment) {
|
||||||
|
String prefix;
|
||||||
|
HAServiceProtocol.HAServiceState targetState =
|
||||||
|
target.getTransitionTargetHAStatus();
|
||||||
|
if (targetState == null ||
|
||||||
|
HAServiceProtocol.HAServiceState.ACTIVE.equals(targetState)) {
|
||||||
|
// null is assumed to be same as ACTIVE, this is to be compatible
|
||||||
|
// with existing tests/use cases where target state is not specified
|
||||||
|
// but assuming it's active.
|
||||||
|
prefix = TARGET_PREFIX;
|
||||||
|
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(targetState)) {
|
||||||
|
prefix = SOURCE_PREFIX;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Unexpected HA service state:" + targetState);
|
||||||
|
}
|
||||||
for (Map.Entry<String, String> e :
|
for (Map.Entry<String, String> e :
|
||||||
target.getFencingParameters().entrySet()) {
|
target.getFencingParameters().entrySet()) {
|
||||||
String key = TARGET_PREFIX + e.getKey();
|
String key = prefix + e.getKey();
|
||||||
key = key.replace('.', '_');
|
key = key.replace('.', '_');
|
||||||
environment.put(key, e.getValue());
|
environment.put(key, e.getValue());
|
||||||
}
|
}
|
||||||
|
|
|
@ -177,7 +177,7 @@ public class TestFailoverController {
|
||||||
}
|
}
|
||||||
|
|
||||||
// svc1 still thinks it's active, that's OK, it was fenced
|
// svc1 still thinks it's active, that's OK, it was fenced
|
||||||
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
|
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
|
||||||
assertSame(svc1, AlwaysSucceedFencer.fencedSvc);
|
assertSame(svc1, AlwaysSucceedFencer.fencedSvc);
|
||||||
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
||||||
assertEquals(HAServiceState.ACTIVE, svc2.state);
|
assertEquals(HAServiceState.ACTIVE, svc2.state);
|
||||||
|
@ -201,7 +201,7 @@ public class TestFailoverController {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
||||||
assertSame(svc1, AlwaysFailFencer.fencedSvc);
|
assertSame(svc2, AlwaysFailFencer.fencedSvc);
|
||||||
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
||||||
assertEquals(HAServiceState.STANDBY, svc2.state);
|
assertEquals(HAServiceState.STANDBY, svc2.state);
|
||||||
}
|
}
|
||||||
|
@ -223,7 +223,7 @@ public class TestFailoverController {
|
||||||
// If fencing was requested and it failed we don't try to make
|
// If fencing was requested and it failed we don't try to make
|
||||||
// svc2 active anyway, and we don't failback to svc1.
|
// svc2 active anyway, and we don't failback to svc1.
|
||||||
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
||||||
assertSame(svc1, AlwaysFailFencer.fencedSvc);
|
assertSame(svc2, AlwaysFailFencer.fencedSvc);
|
||||||
assertEquals(HAServiceState.STANDBY, svc1.state);
|
assertEquals(HAServiceState.STANDBY, svc1.state);
|
||||||
assertEquals(HAServiceState.STANDBY, svc2.state);
|
assertEquals(HAServiceState.STANDBY, svc2.state);
|
||||||
}
|
}
|
||||||
|
@ -344,7 +344,7 @@ public class TestFailoverController {
|
||||||
// and we didn't force it, so we failed back to svc1 and fenced svc2.
|
// and we didn't force it, so we failed back to svc1 and fenced svc2.
|
||||||
// Note svc2 still thinks it's active, that's OK, we fenced it.
|
// Note svc2 still thinks it's active, that's OK, we fenced it.
|
||||||
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
assertEquals(HAServiceState.ACTIVE, svc1.state);
|
||||||
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
|
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
|
||||||
assertSame(svc2, AlwaysSucceedFencer.fencedSvc);
|
assertSame(svc2, AlwaysSucceedFencer.fencedSvc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -373,7 +373,7 @@ public class TestFailoverController {
|
||||||
// so we did not failback to svc1, ie it's still standby.
|
// so we did not failback to svc1, ie it's still standby.
|
||||||
assertEquals(HAServiceState.STANDBY, svc1.state);
|
assertEquals(HAServiceState.STANDBY, svc1.state);
|
||||||
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
assertEquals(1, AlwaysFailFencer.fenceCalled);
|
||||||
assertSame(svc2, AlwaysFailFencer.fencedSvc);
|
assertSame(svc1, AlwaysFailFencer.fencedSvc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -163,6 +163,37 @@ public class TestShellCommandFencer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test if fencing target has peer set, the failover can trigger different
|
||||||
|
* commands on source and destination respectively.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testEnvironmentWithPeer() {
|
||||||
|
HAServiceTarget target = new DummyHAService(HAServiceState.ACTIVE,
|
||||||
|
new InetSocketAddress("dummytarget", 1111));
|
||||||
|
HAServiceTarget source = new DummyHAService(HAServiceState.STANDBY,
|
||||||
|
new InetSocketAddress("dummysource", 2222));
|
||||||
|
target.setTransitionTargetHAStatus(HAServiceState.ACTIVE);
|
||||||
|
source.setTransitionTargetHAStatus(HAServiceState.STANDBY);
|
||||||
|
String cmd = "echo $target_host $target_port,"
|
||||||
|
+ "echo $source_host $source_port";
|
||||||
|
if (!Shell.WINDOWS) {
|
||||||
|
fencer.tryFence(target, cmd);
|
||||||
|
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||||
|
Mockito.contains("echo $ta...rget_port: dummytarget 1111"));
|
||||||
|
fencer.tryFence(source, cmd);
|
||||||
|
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||||
|
Mockito.contains("echo $so...urce_port: dummysource 2222"));
|
||||||
|
} else {
|
||||||
|
fencer.tryFence(target, cmd);
|
||||||
|
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||||
|
Mockito.contains("echo %ta...get_port%: dummytarget 1111"));
|
||||||
|
fencer.tryFence(source, cmd);
|
||||||
|
Mockito.verify(ShellCommandFencer.LOG).info(
|
||||||
|
Mockito.contains("echo %so...urce_port%: dummysource 2222"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that we properly close off our input to the subprocess
|
* Test that we properly close off our input to the subprocess
|
||||||
|
|
|
@ -284,6 +284,11 @@ public class DFSHAAdmin extends HAAdmin {
|
||||||
HAServiceTarget fromNode = resolveTarget(args[0]);
|
HAServiceTarget fromNode = resolveTarget(args[0]);
|
||||||
HAServiceTarget toNode = resolveTarget(args[1]);
|
HAServiceTarget toNode = resolveTarget(args[1]);
|
||||||
|
|
||||||
|
fromNode.setTransitionTargetHAStatus(
|
||||||
|
HAServiceProtocol.HAServiceState.STANDBY);
|
||||||
|
toNode.setTransitionTargetHAStatus(
|
||||||
|
HAServiceProtocol.HAServiceState.ACTIVE);
|
||||||
|
|
||||||
// Check that auto-failover is consistently configured for both nodes.
|
// Check that auto-failover is consistently configured for both nodes.
|
||||||
Preconditions.checkState(
|
Preconditions.checkState(
|
||||||
fromNode.isAutoFailoverEnabled() ==
|
fromNode.isAutoFailoverEnabled() ==
|
||||||
|
|
|
@ -189,13 +189,13 @@ public class TestDFSHAAdminMiniCluster {
|
||||||
tmpFile.deleteOnExit();
|
tmpFile.deleteOnExit();
|
||||||
if (Shell.WINDOWS) {
|
if (Shell.WINDOWS) {
|
||||||
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
||||||
"shell(echo %target_nameserviceid%.%target_namenodeid% " +
|
"shell(echo %source_nameserviceid%.%source_namenodeid% " +
|
||||||
"%target_port% %dfs_ha_namenode_id% > " +
|
"%source_port% %dfs_ha_namenode_id% > " +
|
||||||
tmpFile.getAbsolutePath() + ")");
|
tmpFile.getAbsolutePath() + ")");
|
||||||
} else {
|
} else {
|
||||||
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
||||||
"shell(echo -n $target_nameserviceid.$target_namenodeid " +
|
"shell(echo -n $source_nameserviceid.$source_namenodeid " +
|
||||||
"$target_port $dfs_ha_namenode_id > " +
|
"$source_port $dfs_ha_namenode_id > " +
|
||||||
tmpFile.getAbsolutePath() + ")");
|
tmpFile.getAbsolutePath() + ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue