HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary

Signed-off-by: Josh Elser <elserj@apache.org>
This commit is contained in:
David Manning 2018-08-29 12:06:59 -07:00 committed by Josh Elser
parent dc79029966
commit 5cca61c4d0
2 changed files with 63 additions and 24 deletions

View File

@ -586,6 +586,7 @@ public final class Canary implements Tool {
private boolean failOnError = true; private boolean failOnError = true;
private boolean regionServerMode = false; private boolean regionServerMode = false;
private boolean zookeeperMode = false; private boolean zookeeperMode = false;
private long permittedFailures = 0;
private boolean regionServerAllRegions = false; private boolean regionServerAllRegions = false;
private boolean writeSniffing = false; private boolean writeSniffing = false;
private long configuredWriteTableTimeout = DEFAULT_TIMEOUT; private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
@ -729,6 +730,19 @@ public final class Canary implements Tool {
} }
this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal); this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal);
} }
} else if (cmd.equals("-permittedZookeeperFailures")) {
i++;
if (i == args.length) {
System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
printUsageAndExit();
}
try {
this.permittedFailures = Long.parseLong(args[i]);
} catch (NumberFormatException e) {
System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
printUsageAndExit();
}
} else { } else {
// no options match // no options match
System.err.println(cmd + " options is invalid."); System.err.println(cmd + " options is invalid.");
@ -750,6 +764,10 @@ public final class Canary implements Tool {
printUsageAndExit(); printUsageAndExit();
} }
} }
if (this.permittedFailures != 0 && !this.zookeeperMode) {
System.err.println("-permittedZookeeperFailures requires -zookeeper mode.");
printUsageAndExit();
}
if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) { if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) {
System.err.println("-readTableTimeouts can only be configured in region mode."); System.err.println("-readTableTimeouts can only be configured in region mode.");
printUsageAndExit(); printUsageAndExit();
@ -847,6 +865,8 @@ public final class Canary implements Tool {
System.err.println(" only works in regionserver mode."); System.err.println(" only works in regionserver mode.");
System.err.println(" -zookeeper Tries to grab zookeeper.znode.parent "); System.err.println(" -zookeeper Tries to grab zookeeper.znode.parent ");
System.err.println(" on each zookeeper instance"); System.err.println(" on each zookeeper instance");
System.err.println(" -permittedZookeeperFailures <N> Ignore first N failures when attempting to ");
System.err.println(" connect to individual zookeeper nodes in the ensemble");
System.err.println(" -daemon Continuous check at defined intervals."); System.err.println(" -daemon Continuous check at defined intervals.");
System.err.println(" -interval <N> Interval between checks (sec)"); System.err.println(" -interval <N> Interval between checks (sec)");
System.err.println(" -e Use table/regionserver as regular expression"); System.err.println(" -e Use table/regionserver as regular expression");
@ -889,17 +909,18 @@ public final class Canary implements Tool {
monitor = monitor =
new RegionServerMonitor(connection, monitorTargets, this.useRegExp, new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
(StdOutSink) this.sink, this.executor, this.regionServerAllRegions, (StdOutSink) this.sink, this.executor, this.regionServerAllRegions,
this.treatFailureAsError); this.treatFailureAsError, this.permittedFailures);
} else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) { } else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) {
monitor = monitor =
new ZookeeperMonitor(connection, monitorTargets, this.useRegExp, new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
(StdOutSink) this.sink, this.executor, this.treatFailureAsError); (StdOutSink) this.sink, this.executor, this.treatFailureAsError,
this.permittedFailures);
} else { } else {
monitor = monitor =
new RegionMonitor(connection, monitorTargets, this.useRegExp, new RegionMonitor(connection, monitorTargets, this.useRegExp,
(StdOutSink) this.sink, this.executor, this.writeSniffing, (StdOutSink) this.sink, this.executor, this.writeSniffing,
this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts, this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts,
this.configuredWriteTableTimeout); this.configuredWriteTableTimeout, this.permittedFailures);
} }
return monitor; return monitor;
} }
@ -916,6 +937,7 @@ public final class Canary implements Tool {
protected boolean done = false; protected boolean done = false;
protected int errorCode = 0; protected int errorCode = 0;
protected long allowedFailures = 0;
protected Sink sink; protected Sink sink;
protected ExecutorService executor; protected ExecutorService executor;
@ -932,7 +954,8 @@ public final class Canary implements Tool {
return true; return true;
} }
if (treatFailureAsError && if (treatFailureAsError &&
(sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) { (sink.getReadFailureCount() > allowedFailures || sink.getWriteFailureCount() > allowedFailures)) {
LOG.error("Too many failures detected, treating failure as error, failing the Canary.");
errorCode = FAILURE_EXIT_CODE; errorCode = FAILURE_EXIT_CODE;
return true; return true;
} }
@ -945,7 +968,7 @@ public final class Canary implements Tool {
} }
protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink, protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
ExecutorService executor, boolean treatFailureAsError) { ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
if (null == connection) throw new IllegalArgumentException("connection shall not be null"); if (null == connection) throw new IllegalArgumentException("connection shall not be null");
this.connection = connection; this.connection = connection;
@ -954,6 +977,7 @@ public final class Canary implements Tool {
this.treatFailureAsError = treatFailureAsError; this.treatFailureAsError = treatFailureAsError;
this.sink = sink; this.sink = sink;
this.executor = executor; this.executor = executor;
this.allowedFailures = allowedFailures;
} }
@Override @Override
@ -995,8 +1019,9 @@ public final class Canary implements Tool {
public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp, public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName, StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout) { boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout,
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError); long allowedFailures) {
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
Configuration conf = connection.getConfiguration(); Configuration conf = connection.getConfiguration();
this.writeSniffing = writeSniffing; this.writeSniffing = writeSniffing;
this.writeTableName = writeTableName; this.writeTableName = writeTableName;
@ -1289,8 +1314,8 @@ public final class Canary implements Tool {
private final int timeout; private final int timeout;
protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp, protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
StdOutSink sink, ExecutorService executor, boolean treatFailureAsError) { StdOutSink sink, ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError); super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
Configuration configuration = connection.getConfiguration(); Configuration configuration = connection.getConfiguration();
znode = znode =
configuration.get(ZOOKEEPER_ZNODE_PARENT, configuration.get(ZOOKEEPER_ZNODE_PARENT,
@ -1303,6 +1328,11 @@ public final class Canary implements Tool {
for (InetSocketAddress server : parser.getServerAddresses()) { for (InetSocketAddress server : parser.getServerAddresses()) {
hosts.add(server.toString()); hosts.add(server.toString());
} }
if (allowedFailures > (hosts.size() - 1) / 2) {
LOG.warn("Confirm allowable number of failed ZooKeeper nodes, as quorum will " +
"already be lost. Setting of {} failures is unexpected for {} ensemble size.",
allowedFailures, hosts.size());
}
} }
@Override public void run() { @Override public void run() {
@ -1351,8 +1381,8 @@ public final class Canary implements Tool {
public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp, public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
StdOutSink sink, ExecutorService executor, boolean allRegions, StdOutSink sink, ExecutorService executor, boolean allRegions,
boolean treatFailureAsError) { boolean treatFailureAsError, long allowedFailures) {
super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError); super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
this.allRegions = allRegions; this.allRegions = allRegions;
} }

View File

@ -90,20 +90,14 @@ public class TestCanaryTool {
@Test @Test
public void testBasicZookeeperCanaryWorks() throws Exception { public void testBasicZookeeperCanaryWorks() throws Exception {
Integer port = final String[] args = { "-t", "10000", "-zookeeper" };
Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null); testZookeeperCanaryWithArgs(args);
testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM, }
"localhost:" + port + "/hbase");
ExecutorService executor = new ScheduledThreadPoolExecutor(2);
Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
Canary canary = new Canary(executor, sink);
String[] args = { "-t", "10000", "-zookeeper" };
assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
String baseZnode = testingUtility.getConfiguration() @Test
.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception {
verify(sink, atLeastOnce()) final String[] args = { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" };
.publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong()); testZookeeperCanaryWithArgs(args);
} }
@Test @Test
@ -250,4 +244,19 @@ public class TestCanaryTool {
assertEquals("verify no read error count", 0, canary.getReadFailures().size()); assertEquals("verify no read error count", 0, canary.getReadFailures().size());
} }
private void testZookeeperCanaryWithArgs(String[] args) throws Exception {
Integer port =
Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
"localhost:" + port + "/hbase");
ExecutorService executor = new ScheduledThreadPoolExecutor(2);
Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
Canary canary = new Canary(executor, sink);
assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
String baseZnode = testingUtility.getConfiguration()
.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
verify(sink, atLeastOnce())
.publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
}
} }