HBASE-13199 Some small improvements on canary tool (Shaohui Liu)

This commit is contained in:
Liu Shaohui 2015-03-19 10:44:35 +08:00
parent 602e11cc74
commit cf7ef936d2
1 changed files with 266 additions and 157 deletions

View File

@ -24,10 +24,17 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -56,6 +63,8 @@ import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@ -119,6 +128,174 @@ public final class Canary implements Tool {
}
}
/**
* For each column family of the region tries to get one row and outputs the latency, or the
* failure.
*/
static class RegionTask implements Callable<Void> {
private Connection connection;
private HRegionInfo region;
private Sink sink;
RegionTask(Connection connection, HRegionInfo region, Sink sink) {
this.connection = connection;
this.region = region;
this.sink = sink;
}
@Override
public Void call() {
Table table = null;
HTableDescriptor tableDesc = null;
try {
table = connection.getTable(region.getTable());
tableDesc = table.getTableDescriptor();
} catch (IOException e) {
LOG.debug("sniffRegion failed", e);
sink.publishReadFailure(region, e);
return null;
} finally {
if (table != null) {
try {
table.close();
} catch (IOException e) {
}
}
}
byte[] startKey = null;
Get get = null;
Scan scan = null;
ResultScanner rs = null;
StopWatch stopWatch = new StopWatch();
for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
stopWatch.reset();
startKey = region.getStartKey();
// Can't do a get on empty start row so do a Scan of first element if any instead.
if (startKey.length > 0) {
get = new Get(startKey);
get.setCacheBlocks(false);
get.setFilter(new FirstKeyOnlyFilter());
get.addFamily(column.getName());
} else {
scan = new Scan();
scan.setCaching(1);
scan.setCacheBlocks(false);
scan.setFilter(new FirstKeyOnlyFilter());
scan.addFamily(column.getName());
scan.setMaxResultSize(1L);
}
try {
if (startKey.length > 0) {
stopWatch.start();
table.get(get);
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
} else {
stopWatch.start();
rs = table.getScanner(scan);
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
}
} catch (Exception e) {
sink.publishReadFailure(region, column, e);
} finally {
if (rs != null) {
rs.close();
}
if (table != null) {
try {
table.close();
} catch (IOException e) {
}
}
scan = null;
get = null;
startKey = null;
}
}
return null;
}
}
/**
* Get one row from a region on the regionserver and outputs the latency, or the failure.
*/
static class RegionServerTask implements Callable<Void> {
private Connection connection;
private String serverName;
private HRegionInfo region;
private ExtendedSink sink;
RegionServerTask(Connection connection, String serverName, HRegionInfo region,
ExtendedSink sink) {
this.connection = connection;
this.serverName = serverName;
this.region = region;
this.sink = sink;
}
@Override
public Void call() {
TableName tableName = null;
Table table = null;
Get get = null;
byte[] startKey = null;
Scan scan = null;
StopWatch stopWatch = new StopWatch();
// monitor one region on every region server
stopWatch.reset();
try {
tableName = region.getTable();
table = connection.getTable(tableName);
startKey = region.getStartKey();
// Can't do a get on empty start row so do a Scan of first element if any instead.
if (startKey.length > 0) {
get = new Get(startKey);
get.setCacheBlocks(false);
get.setFilter(new FirstKeyOnlyFilter());
stopWatch.start();
table.get(get);
stopWatch.stop();
} else {
scan = new Scan();
scan.setCacheBlocks(false);
scan.setFilter(new FirstKeyOnlyFilter());
scan.setCaching(1);
scan.setMaxResultSize(1L);
stopWatch.start();
ResultScanner s = table.getScanner(scan);
s.close();
stopWatch.stop();
}
sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime());
} catch (TableNotFoundException tnfe) {
// This is ignored because it doesn't imply that the regionserver is dead
} catch (TableNotEnabledException tnee) {
// This is considered a success since we got a response.
LOG.debug("The targeted table was disabled. Assuming success.");
} catch (DoNotRetryIOException dnrioe) {
sink.publishReadFailure(tableName.getNameAsString(), serverName);
LOG.error(dnrioe);
} catch (IOException e) {
sink.publishReadFailure(tableName.getNameAsString(), serverName);
LOG.error(e);
} finally {
if (table != null) {
try {
table.close();
} catch (IOException e) {/* DO NOTHING */
}
}
scan = null;
get = null;
startKey = null;
}
return null;
}
}
private static final int USAGE_EXIT_CODE = 1;
private static final int INIT_ERROR_EXIT_CODE = 2;
private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
@ -128,6 +305,8 @@ public final class Canary implements Tool {
private static final long DEFAULT_TIMEOUT = 600000; // 10 mins
private static final int MAX_THREADS_NUM = 16; // #threads to contact regions
private static final Log LOG = LogFactory.getLog(Canary.class);
private Configuration conf = null;
@ -138,12 +317,14 @@ public final class Canary implements Tool {
private long timeout = DEFAULT_TIMEOUT;
private boolean failOnError = true;
private boolean regionServerMode = false;
private ExecutorService executor; // threads to retrieve data from regionservers
public Canary() {
this(new RegionServerStdOutSink());
this(new ScheduledThreadPoolExecutor(1), new RegionServerStdOutSink());
}
public Canary(Sink sink) {
public Canary(ExecutorService executor, Sink sink) {
this.executor = executor;
this.sink = sink;
}
@ -326,13 +507,12 @@ public final class Canary implements Tool {
}
if (this.regionServerMode) {
monitor = new RegionServerMonitor(
connection,
monitorTargets,
this.useRegExp,
(ExtendedSink)this.sink);
monitor =
new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
(ExtendedSink) this.sink, this.executor);
} else {
monitor = new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink);
monitor =
new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor);
}
return monitor;
}
@ -349,6 +529,7 @@ public final class Canary implements Tool {
protected boolean done = false;
protected int errorCode = 0;
protected Sink sink;
protected ExecutorService executor;
public boolean isDone() {
return done;
@ -363,14 +544,15 @@ public final class Canary implements Tool {
if (this.admin != null) this.admin.close();
}
protected Monitor(Connection connection, String[] monitorTargets,
boolean useRegExp, Sink sink) {
protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
ExecutorService executor) {
if (null == connection) throw new IllegalArgumentException("connection shall not be null");
this.connection = connection;
this.targets = monitorTargets;
this.useRegExp = useRegExp;
this.sink = sink;
this.executor = executor;
}
public abstract void run();
@ -394,23 +576,31 @@ public final class Canary implements Tool {
// a monitor for region mode
private static class RegionMonitor extends Monitor {
public RegionMonitor(Connection connection, String[] monitorTargets,
boolean useRegExp, Sink sink) {
super(connection, monitorTargets, useRegExp, sink);
public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
Sink sink, ExecutorService executor) {
super(connection, monitorTargets, useRegExp, sink, executor);
}
@Override
public void run() {
if (this.initAdmin()) {
try {
List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
if (this.targets != null && this.targets.length > 0) {
String[] tables = generateMonitorTables(this.targets);
this.initialized = true;
for (String table : tables) {
Canary.sniff(admin, sink, table);
taskFutures.addAll(Canary.sniff(admin, sink, table, executor));
}
} else {
sniff();
taskFutures.addAll(sniff());
}
for (Future<Void> future : taskFutures) {
try {
future.get();
} catch (ExecutionException e) {
LOG.error("Sniff region failed!", e);
}
}
} catch (Exception e) {
LOG.error("Run regionMonitor failed", e);
@ -445,8 +635,7 @@ public final class Canary implements Tool {
if (tmpTables.size() > 0) {
returnTables = tmpTables.toArray(new String[tmpTables.size()]);
} else {
String msg = "No HTable found, tablePattern:"
+ Arrays.toString(monitorTargets);
String msg = "No HTable found, tablePattern:" + Arrays.toString(monitorTargets);
LOG.error(msg);
this.errorCode = INIT_ERROR_EXIT_CODE;
throw new TableNotFoundException(msg);
@ -461,12 +650,15 @@ public final class Canary implements Tool {
/*
* canary entry point to monitor all the tables.
*/
private void sniff() throws Exception {
private List<Future<Void>> sniff() throws Exception {
List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
for (HTableDescriptor table : admin.listTables()) {
Canary.sniff(admin, sink, table);
if (admin.isTableEnabled(table.getTableName())) {
taskFutures.addAll(Canary.sniff(admin, sink, table, executor));
}
}
return taskFutures;
}
}
/**
@ -474,108 +666,56 @@ public final class Canary implements Tool {
* @throws Exception
*/
public static void sniff(final Admin admin, TableName tableName) throws Exception {
sniff(admin, new StdOutSink(), tableName.getNameAsString());
List<Future<Void>> taskFutures =
Canary.sniff(admin, new StdOutSink(), tableName.getNameAsString(),
new ScheduledThreadPoolExecutor(1));
for (Future<Void> future : taskFutures) {
future.get();
}
}
/**
* Canary entry point for specified table.
* @throws Exception
*/
private static void sniff(final Admin admin, final Sink sink, String tableName)
throws Exception {
if (admin.isTableAvailable(TableName.valueOf(tableName))) {
sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)));
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, String tableName,
ExecutorService executor) throws Exception {
if (admin.isTableEnabled(TableName.valueOf(tableName))) {
return Canary.sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)),
executor);
} else {
LOG.warn(String.format("Table %s is not available", tableName));
LOG.warn(String.format("Table %s is not enabled", tableName));
}
return new LinkedList<Future<Void>>();
}
/*
* Loops over regions that owns this table, and output some information abouts the state.
*/
private static void sniff(final Admin admin, final Sink sink, HTableDescriptor tableDesc)
throws Exception {
private static List<Future<Void>> sniff(final Admin admin, final Sink sink,
HTableDescriptor tableDesc, ExecutorService executor) throws Exception {
Table table = null;
try {
table = admin.getConnection().getTable(tableDesc.getTableName());
} catch (TableNotFoundException e) {
return;
return new ArrayList<Future<Void>>();
}
List<RegionTask> tasks = new ArrayList<RegionTask>();
try {
for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) {
try {
sniffRegion(admin, sink, region, table);
} catch (Exception e) {
sink.publishReadFailure(region, e);
LOG.debug("sniffRegion failed", e);
}
tasks.add(new RegionTask(admin.getConnection(), region, sink));
}
} finally {
table.close();
}
}
/*
* For each column family of the region tries to get one row and outputs the latency, or the
* failure.
*/
private static void sniffRegion(
final Admin admin,
final Sink sink,
HRegionInfo region,
Table table) throws Exception {
HTableDescriptor tableDesc = table.getTableDescriptor();
byte[] startKey = null;
Get get = null;
Scan scan = null;
ResultScanner rs = null;
StopWatch stopWatch = new StopWatch();
for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
stopWatch.reset();
startKey = region.getStartKey();
// Can't do a get on empty start row so do a Scan of first element if any instead.
if (startKey.length > 0) {
get = new Get(startKey);
get.addFamily(column.getName());
} else {
scan = new Scan();
scan.setCaching(1);
scan.addFamily(column.getName());
scan.setMaxResultSize(1L);
}
try {
if (startKey.length > 0) {
stopWatch.start();
table.get(get);
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
} else {
stopWatch.start();
rs = table.getScanner(scan);
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
}
} catch (Exception e) {
sink.publishReadFailure(region, column, e);
} finally {
if (rs != null) {
rs.close();
}
scan = null;
get = null;
startKey = null;
}
}
return executor.invokeAll(tasks);
}
// a monitor for regionserver mode
private static class RegionServerMonitor extends Monitor {
public RegionServerMonitor(Connection connection, String[] monitorTargets,
boolean useRegExp, ExtendedSink sink) {
super(connection, monitorTargets, useRegExp, sink);
public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
ExtendedSink sink, ExecutorService executor) {
super(connection, monitorTargets, useRegExp, sink, executor);
}
private ExtendedSink getSink() {
@ -623,64 +763,27 @@ public final class Canary implements Tool {
}
private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
String serverName = null;
TableName tableName = null;
HRegionInfo region = null;
Table table = null;
Get get = null;
byte[] startKey = null;
Scan scan = null;
StopWatch stopWatch = new StopWatch();
List<RegionServerTask> tasks = new ArrayList<RegionServerTask>();
Random rand =new Random();
// monitor one region on every region server
for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
stopWatch.reset();
serverName = entry.getKey();
// always get the first region
region = entry.getValue().get(0);
try {
tableName = region.getTable();
table = admin.getConnection().getTable(tableName);
startKey = region.getStartKey();
// Can't do a get on empty start row so do a Scan of first element if any instead.
if(startKey.length > 0) {
get = new Get(startKey);
stopWatch.start();
table.get(get);
stopWatch.stop();
} else {
scan = new Scan();
scan.setCaching(1);
scan.setMaxResultSize(1L);
stopWatch.start();
ResultScanner s = table.getScanner(scan);
s.close();
stopWatch.stop();
String serverName = entry.getKey();
// random select a region
HRegionInfo region = entry.getValue().get(rand.nextInt(entry.getValue().size()));
tasks.add(new RegionServerTask(this.connection, serverName, region, getSink()));
}
this.getSink().publishReadTiming(tableName.getNameAsString(),
serverName, stopWatch.getTime());
} catch (TableNotFoundException tnfe) {
// This is ignored because it doesn't imply that the regionserver is dead
} catch (TableNotEnabledException tnee) {
// This is considered a success since we got a response.
LOG.debug("The targeted table was disabled. Assuming success.");
} catch (DoNotRetryIOException dnrioe) {
this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
LOG.error(dnrioe);
} catch (IOException e) {
this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
LOG.error(e);
try {
for (Future<Void> future : this.executor.invokeAll(tasks)) {
try {
future.get();
} catch (ExecutionException e) {
LOG.error("Sniff regionserver failed!", e);
this.errorCode = ERROR_EXIT_CODE;
} finally {
if (table != null) {
try {
table.close();
} catch (IOException e) {/* DO NOTHING */
}
}
scan = null;
get = null;
startKey = null;
}
} catch (InterruptedException e) {
this.errorCode = ERROR_EXIT_CODE;
LOG.error("Sniff regionserver failed!", e);
}
}
@ -780,10 +883,16 @@ public final class Canary implements Tool {
if (authChore != null) {
choreService.scheduleChore(authChore);
}
int numThreads = conf.getInt("hbase.canary.threads.num", MAX_THREADS_NUM);
ExecutorService executor = new ScheduledThreadPoolExecutor(numThreads);
int exitCode = ToolRunner.run(conf, new Canary(), args);
Class<? extends Sink> sinkClass =
conf.getClass("hbase.canary.sink.class", StdOutSink.class, Sink.class);
Sink sink = ReflectionUtils.newInstance(sinkClass);
int exitCode = ToolRunner.run(conf, new Canary(executor, sink), args);
choreService.shutdown();
executor.shutdown();
System.exit(exitCode);
}
}