HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary
Signed-off-by: Josh Elser <elserj@apache.org>
This commit is contained in:
parent
dc79029966
commit
5cca61c4d0
|
@ -586,6 +586,7 @@ public final class Canary implements Tool {
|
||||||
private boolean failOnError = true;
|
private boolean failOnError = true;
|
||||||
private boolean regionServerMode = false;
|
private boolean regionServerMode = false;
|
||||||
private boolean zookeeperMode = false;
|
private boolean zookeeperMode = false;
|
||||||
|
private long permittedFailures = 0;
|
||||||
private boolean regionServerAllRegions = false;
|
private boolean regionServerAllRegions = false;
|
||||||
private boolean writeSniffing = false;
|
private boolean writeSniffing = false;
|
||||||
private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
|
private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
|
||||||
|
@ -729,6 +730,19 @@ public final class Canary implements Tool {
|
||||||
}
|
}
|
||||||
this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal);
|
this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal);
|
||||||
}
|
}
|
||||||
|
} else if (cmd.equals("-permittedZookeeperFailures")) {
|
||||||
|
i++;
|
||||||
|
|
||||||
|
if (i == args.length) {
|
||||||
|
System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
|
||||||
|
printUsageAndExit();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
this.permittedFailures = Long.parseLong(args[i]);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
|
||||||
|
printUsageAndExit();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// no options match
|
// no options match
|
||||||
System.err.println(cmd + " options is invalid.");
|
System.err.println(cmd + " options is invalid.");
|
||||||
|
@ -750,6 +764,10 @@ public final class Canary implements Tool {
|
||||||
printUsageAndExit();
|
printUsageAndExit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (this.permittedFailures != 0 && !this.zookeeperMode) {
|
||||||
|
System.err.println("-permittedZookeeperFailures requires -zookeeper mode.");
|
||||||
|
printUsageAndExit();
|
||||||
|
}
|
||||||
if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) {
|
if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) {
|
||||||
System.err.println("-readTableTimeouts can only be configured in region mode.");
|
System.err.println("-readTableTimeouts can only be configured in region mode.");
|
||||||
printUsageAndExit();
|
printUsageAndExit();
|
||||||
|
@ -847,6 +865,8 @@ public final class Canary implements Tool {
|
||||||
System.err.println(" only works in regionserver mode.");
|
System.err.println(" only works in regionserver mode.");
|
||||||
System.err.println(" -zookeeper Tries to grab zookeeper.znode.parent ");
|
System.err.println(" -zookeeper Tries to grab zookeeper.znode.parent ");
|
||||||
System.err.println(" on each zookeeper instance");
|
System.err.println(" on each zookeeper instance");
|
||||||
|
System.err.println(" -permittedZookeeperFailures <N> Ignore first N failures when attempting to ");
|
||||||
|
System.err.println(" connect to individual zookeeper nodes in the ensemble");
|
||||||
System.err.println(" -daemon Continuous check at defined intervals.");
|
System.err.println(" -daemon Continuous check at defined intervals.");
|
||||||
System.err.println(" -interval <N> Interval between checks (sec)");
|
System.err.println(" -interval <N> Interval between checks (sec)");
|
||||||
System.err.println(" -e Use table/regionserver as regular expression");
|
System.err.println(" -e Use table/regionserver as regular expression");
|
||||||
|
@ -889,17 +909,18 @@ public final class Canary implements Tool {
|
||||||
monitor =
|
monitor =
|
||||||
new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
|
new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
|
||||||
(StdOutSink) this.sink, this.executor, this.regionServerAllRegions,
|
(StdOutSink) this.sink, this.executor, this.regionServerAllRegions,
|
||||||
this.treatFailureAsError);
|
this.treatFailureAsError, this.permittedFailures);
|
||||||
} else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) {
|
} else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) {
|
||||||
monitor =
|
monitor =
|
||||||
new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
|
new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
|
||||||
(StdOutSink) this.sink, this.executor, this.treatFailureAsError);
|
(StdOutSink) this.sink, this.executor, this.treatFailureAsError,
|
||||||
|
this.permittedFailures);
|
||||||
} else {
|
} else {
|
||||||
monitor =
|
monitor =
|
||||||
new RegionMonitor(connection, monitorTargets, this.useRegExp,
|
new RegionMonitor(connection, monitorTargets, this.useRegExp,
|
||||||
(StdOutSink) this.sink, this.executor, this.writeSniffing,
|
(StdOutSink) this.sink, this.executor, this.writeSniffing,
|
||||||
this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts,
|
this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts,
|
||||||
this.configuredWriteTableTimeout);
|
this.configuredWriteTableTimeout, this.permittedFailures);
|
||||||
}
|
}
|
||||||
return monitor;
|
return monitor;
|
||||||
}
|
}
|
||||||
|
@ -916,6 +937,7 @@ public final class Canary implements Tool {
|
||||||
|
|
||||||
protected boolean done = false;
|
protected boolean done = false;
|
||||||
protected int errorCode = 0;
|
protected int errorCode = 0;
|
||||||
|
protected long allowedFailures = 0;
|
||||||
protected Sink sink;
|
protected Sink sink;
|
||||||
protected ExecutorService executor;
|
protected ExecutorService executor;
|
||||||
|
|
||||||
|
@ -932,7 +954,8 @@ public final class Canary implements Tool {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (treatFailureAsError &&
|
if (treatFailureAsError &&
|
||||||
(sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) {
|
(sink.getReadFailureCount() > allowedFailures || sink.getWriteFailureCount() > allowedFailures)) {
|
||||||
|
LOG.error("Too many failures detected, treating failure as error, failing the Canary.");
|
||||||
errorCode = FAILURE_EXIT_CODE;
|
errorCode = FAILURE_EXIT_CODE;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -945,7 +968,7 @@ public final class Canary implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
|
protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
|
||||||
ExecutorService executor, boolean treatFailureAsError) {
|
ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
|
||||||
if (null == connection) throw new IllegalArgumentException("connection shall not be null");
|
if (null == connection) throw new IllegalArgumentException("connection shall not be null");
|
||||||
|
|
||||||
this.connection = connection;
|
this.connection = connection;
|
||||||
|
@ -954,6 +977,7 @@ public final class Canary implements Tool {
|
||||||
this.treatFailureAsError = treatFailureAsError;
|
this.treatFailureAsError = treatFailureAsError;
|
||||||
this.sink = sink;
|
this.sink = sink;
|
||||||
this.executor = executor;
|
this.executor = executor;
|
||||||
|
this.allowedFailures = allowedFailures;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -995,8 +1019,9 @@ public final class Canary implements Tool {
|
||||||
|
|
||||||
public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
|
public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
|
||||||
StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
|
StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
|
||||||
boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout) {
|
boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout,
|
||||||
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
|
long allowedFailures) {
|
||||||
|
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
|
||||||
Configuration conf = connection.getConfiguration();
|
Configuration conf = connection.getConfiguration();
|
||||||
this.writeSniffing = writeSniffing;
|
this.writeSniffing = writeSniffing;
|
||||||
this.writeTableName = writeTableName;
|
this.writeTableName = writeTableName;
|
||||||
|
@ -1289,8 +1314,8 @@ public final class Canary implements Tool {
|
||||||
private final int timeout;
|
private final int timeout;
|
||||||
|
|
||||||
protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
|
protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
|
||||||
StdOutSink sink, ExecutorService executor, boolean treatFailureAsError) {
|
StdOutSink sink, ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
|
||||||
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
|
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
|
||||||
Configuration configuration = connection.getConfiguration();
|
Configuration configuration = connection.getConfiguration();
|
||||||
znode =
|
znode =
|
||||||
configuration.get(ZOOKEEPER_ZNODE_PARENT,
|
configuration.get(ZOOKEEPER_ZNODE_PARENT,
|
||||||
|
@ -1303,6 +1328,11 @@ public final class Canary implements Tool {
|
||||||
for (InetSocketAddress server : parser.getServerAddresses()) {
|
for (InetSocketAddress server : parser.getServerAddresses()) {
|
||||||
hosts.add(server.toString());
|
hosts.add(server.toString());
|
||||||
}
|
}
|
||||||
|
if (allowedFailures > (hosts.size() - 1) / 2) {
|
||||||
|
LOG.warn("Confirm allowable number of failed ZooKeeper nodes, as quorum will " +
|
||||||
|
"already be lost. Setting of {} failures is unexpected for {} ensemble size.",
|
||||||
|
allowedFailures, hosts.size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public void run() {
|
@Override public void run() {
|
||||||
|
@ -1351,8 +1381,8 @@ public final class Canary implements Tool {
|
||||||
|
|
||||||
public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
|
public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
|
||||||
StdOutSink sink, ExecutorService executor, boolean allRegions,
|
StdOutSink sink, ExecutorService executor, boolean allRegions,
|
||||||
boolean treatFailureAsError) {
|
boolean treatFailureAsError, long allowedFailures) {
|
||||||
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
|
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
|
||||||
this.allRegions = allRegions;
|
this.allRegions = allRegions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -90,20 +90,14 @@ public class TestCanaryTool {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBasicZookeeperCanaryWorks() throws Exception {
|
public void testBasicZookeeperCanaryWorks() throws Exception {
|
||||||
Integer port =
|
final String[] args = { "-t", "10000", "-zookeeper" };
|
||||||
Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
|
testZookeeperCanaryWithArgs(args);
|
||||||
testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
|
}
|
||||||
"localhost:" + port + "/hbase");
|
|
||||||
ExecutorService executor = new ScheduledThreadPoolExecutor(2);
|
|
||||||
Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
|
|
||||||
Canary canary = new Canary(executor, sink);
|
|
||||||
String[] args = { "-t", "10000", "-zookeeper" };
|
|
||||||
assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
|
|
||||||
|
|
||||||
String baseZnode = testingUtility.getConfiguration()
|
@Test
|
||||||
.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
|
public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception {
|
||||||
verify(sink, atLeastOnce())
|
final String[] args = { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" };
|
||||||
.publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
|
testZookeeperCanaryWithArgs(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -250,4 +244,19 @@ public class TestCanaryTool {
|
||||||
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
|
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void testZookeeperCanaryWithArgs(String[] args) throws Exception {
|
||||||
|
Integer port =
|
||||||
|
Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
|
||||||
|
testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
|
||||||
|
"localhost:" + port + "/hbase");
|
||||||
|
ExecutorService executor = new ScheduledThreadPoolExecutor(2);
|
||||||
|
Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
|
||||||
|
Canary canary = new Canary(executor, sink);
|
||||||
|
assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
|
||||||
|
|
||||||
|
String baseZnode = testingUtility.getConfiguration()
|
||||||
|
.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
|
||||||
|
verify(sink, atLeastOnce())
|
||||||
|
.publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue