HBASE-15066 Small improvements to Canary tool

This commit is contained in:
Enis Soztutar 2016-01-06 02:47:19 -08:00
parent 073e00c005
commit 893a54c3a4
3 changed files with 72 additions and 24 deletions

View File

@ -98,6 +98,7 @@ if [ $# = 0 ]; then
echo " mapredcp Dump CLASSPATH entries required by mapreduce"
echo " pe Run PerformanceEvaluation"
echo " ltt Run LoadTestTool"
echo " canary Run the Canary tool"
echo " version Print the version"
echo " CLASSNAME Run the class named CLASSNAME"
exit 1
@ -368,6 +369,9 @@ elif [ "$COMMAND" = "pe" ] ; then
elif [ "$COMMAND" = "ltt" ] ; then
CLASS='org.apache.hadoop.hbase.util.LoadTestTool'
HBASE_OPTS="$HBASE_OPTS $HBASE_LTT_OPTS"
elif [ "$COMMAND" = "canary" ] ; then
CLASS='org.apache.hadoop.hbase.tool.Canary'
HBASE_OPTS="$HBASE_OPTS $HBASE_CANARY_OPTS"
elif [ "$COMMAND" = "version" ] ; then
CLASS='org.apache.hadoop.hbase.util.VersionInfo'
else

View File

@ -195,6 +195,10 @@ public final class Canary implements Tool {
Table table = null;
HTableDescriptor tableDesc = null;
try {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading table descriptor for table %s",
region.getTable()));
}
table = connection.getTable(region.getTable());
tableDesc = table.getTableDescriptor();
} catch (IOException e) {
@ -232,20 +236,24 @@ public final class Canary implements Tool {
scan.setFilter(new FirstKeyOnlyFilter());
scan.addFamily(column.getName());
scan.setMaxResultSize(1L);
scan.setSmall(true);
}
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading from table %s region %s column family %s and key %s",
tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(),
Bytes.toStringBinary(startKey)));
}
try {
stopWatch.start();
if (startKey.length > 0) {
stopWatch.start();
table.get(get);
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
} else {
stopWatch.start();
rs = table.getScanner(scan);
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
rs.next();
}
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
} catch (Exception e) {
sink.publishReadFailure(region, column, e);
} finally {
@ -286,6 +294,12 @@ public final class Canary implements Tool {
byte[] value = new byte[writeValueSize];
Bytes.random(value);
put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("writing to table %s region %s column family %s and key %s",
tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(),
Bytes.toStringBinary(rowToCheck)));
}
try {
long startTime = System.currentTimeMillis();
table.put(put);
@ -337,6 +351,11 @@ public final class Canary implements Tool {
table = connection.getTable(tableName);
startKey = region.getStartKey();
// Can't do a get on empty start row so do a Scan of first element if any instead.
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading from region server %s table %s region %s and key %s",
serverName, region.getTable(), region.getRegionNameAsString(),
Bytes.toStringBinary(startKey)));
}
if (startKey.length > 0) {
get = new Get(startKey);
get.setCacheBlocks(false);
@ -350,8 +369,10 @@ public final class Canary implements Tool {
scan.setFilter(new FirstKeyOnlyFilter());
scan.setCaching(1);
scan.setMaxResultSize(1L);
scan.setSmall(true);
stopWatch.start();
ResultScanner s = table.getScanner(scan);
s.next();
s.close();
stopWatch.stop();
}
@ -547,8 +568,6 @@ public final class Canary implements Tool {
long startTime = 0;
long currentTimeLength = 0;
// Get a connection to use in below.
// try-with-resources jdk7 construct. See
// http://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
try (Connection connection = ConnectionFactory.createConnection(this.conf)) {
do {
// Do monitor !!
@ -613,8 +632,8 @@ public final class Canary implements Tool {
System.err.println(" only works in regionserver mode.");
System.err.println(" -daemon Continuous check at defined intervals.");
System.err.println(" -interval <N> Interval between checks (sec)");
System.err.println(" -e Use region/regionserver as regular expression");
System.err.println(" which means the region/regionserver is regular expression pattern");
System.err.println(" -e Use table/regionserver as regular expression");
System.err.println(" which means the table/regionserver is regular expression pattern");
System.err.println(" -f <B> stop whole program if first error occurs," +
" default is true");
System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
@ -691,6 +710,7 @@ public final class Canary implements Tool {
this.executor = executor;
}
@Override
public abstract void run();
protected boolean initAdmin() {
@ -793,11 +813,17 @@ public final class Canary implements Tool {
HTableDescriptor[] tds = null;
Set<String> tmpTables = new TreeSet<String>();
try {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables"));
}
tds = this.admin.listTables(pattern);
if (tds == null) {
tds = new HTableDescriptor[0];
}
for (String monitorTarget : monitorTargets) {
pattern = Pattern.compile(monitorTarget);
tds = this.admin.listTables(pattern);
if (tds != null) {
for (HTableDescriptor td : tds) {
for (HTableDescriptor td : tds) {
if (pattern.matcher(td.getNameAsString()).matches()) {
tmpTables.add(td.getNameAsString());
}
}
@ -826,6 +852,9 @@ public final class Canary implements Tool {
* canary entry point to monitor all the tables.
*/
private List<Future<Void>> sniff(TaskType taskType) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables"));
}
List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
for (HTableDescriptor table : admin.listTables()) {
if (admin.isTableEnabled(table.getTableName())
@ -873,7 +902,7 @@ public final class Canary implements Tool {
int numberOfRegions = (int)(numberOfServers * regionsLowerLimit);
LOG.info("Number of live regionservers: " + numberOfServers + ", "
+ "pre-splitting the canary table into " + numberOfRegions + " regions "
+ "(current lower limi of regions per server is " + regionsLowerLimit
+ "(current lower limit of regions per server is " + regionsLowerLimit
+ " and you can change it by config: "
+ HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )");
HTableDescriptor desc = new HTableDescriptor(writeTableName);
@ -916,6 +945,10 @@ public final class Canary implements Tool {
*/
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, String tableName,
ExecutorService executor, TaskType taskType) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("checking table is enabled and getting table descriptor for table %s",
tableName));
}
if (admin.isTableEnabled(TableName.valueOf(tableName))) {
return Canary.sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)),
executor, taskType);
@ -930,6 +963,11 @@ public final class Canary implements Tool {
*/
private static List<Future<Void>> sniff(final Admin admin, final Sink sink,
HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of regions for table %s", tableDesc.getTableName()));
}
Table table = null;
try {
table = admin.getConnection().getTable(tableDesc.getTableName());
@ -975,6 +1013,9 @@ public final class Canary implements Tool {
List<String> foundTableNames = new ArrayList<String>();
TableName[] tableNames = null;
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables"));
}
try {
tableNames = this.admin.listTableNames();
} catch (IOException e) {
@ -1060,6 +1101,9 @@ public final class Canary implements Tool {
Table table = null;
RegionLocator regionLocator = null;
try {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables and locations"));
}
HTableDescriptor[] tableDescs = this.admin.listTables();
List<HRegionInfo> regions = null;
for (HTableDescriptor tableDesc : tableDescs) {

View File

@ -79,7 +79,7 @@ There is a Canary class can help users to canary-test the HBase cluster status,
To see the usage, use the `--help` parameter.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -help
$ ${HBASE_HOME}/bin/hbase canary -help
Usage: bin/hbase org.apache.hadoop.hbase.tool.Canary [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]
where [opts] are:
@ -126,7 +126,7 @@ Following are some examples based on the previous given case.
==== Canary test for every column family (store) of every region of every table
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary
$ ${HBASE_HOME}/bin/hbase canary
3/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf1 in 2ms
13/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf2 in 2ms
@ -147,7 +147,7 @@ This is a default behavior of the this tool does.
You can also test one or more specific tables.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary test-01 test-02
$ ${HBASE_HOME}/bin/hbase canary test-01 test-02
----
==== Canary test with RegionServer granularity
@ -155,7 +155,7 @@ $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary test-01 test-02
This will pick one small piece of data from each RegionServer, and can also put your RegionServer name as input options for canary-test specific RegionServer.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -regionserver
$ ${HBASE_HOME}/bin/hbase canary -regionserver
13/12/09 06:05:17 INFO tool.Canary: Read from table:test-01 on region server:rs2 in 72ms
13/12/09 06:05:17 INFO tool.Canary: Read from table:test-02 on region server:rs3 in 34ms
@ -167,7 +167,7 @@ $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -regionserver
This will test both table test-01 and test-02.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -e test-0[1-2]
$ ${HBASE_HOME}/bin/hbase canary -e test-0[1-2]
----
==== Run canary test as daemon mode
@ -176,13 +176,13 @@ Run repeatedly with interval defined in option `-interval` whose default value i
This daemon will stop itself and return non-zero error code if any error occurs, due to the default value of option -f is true.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -daemon
$ ${HBASE_HOME}/bin/hbase canary -daemon
----
Run repeatedly with internal 5 seconds and will not stop itself even if errors occur in the test.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -daemon -interval 50000 -f false
$ ${HBASE_HOME}/bin/hbase canary -daemon -interval 50000 -f false
----
==== Force timeout if canary test stuck
@ -192,7 +192,7 @@ Because of this we provide a timeout option to kill the canary test and return a
This run sets the timeout value to 60 seconds, the default value is 600 seconds.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -t 600000
$ ${HBASE_HOME}/bin/hbase canary -t 600000
----
==== Enable write sniffing in canary
@ -203,12 +203,12 @@ When the write sniffing is enabled, the canary tool will create an hbase table a
regions of the table distributed on all region servers. In each sniffing period, the canary will
try to put data to these regions to check the write availability of each region server.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing
$ ${HBASE_HOME}/bin/hbase canary -writeSniffing
----
The default write table is `hbase:canary` and can be specified by the option `-writeTable`.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing -writeTable ns:canary
$ ${HBASE_HOME}/bin/hbase canary -writeSniffing -writeTable ns:canary
----
The default value size of each put is 10 bytes and you can set it by the config key: