HBASE-15066 Small improvements to Canary tool

This commit is contained in:
Enis Soztutar 2016-01-06 02:47:19 -08:00
parent 073e00c005
commit 893a54c3a4
3 changed files with 72 additions and 24 deletions

View File

@ -98,6 +98,7 @@ if [ $# = 0 ]; then
echo " mapredcp Dump CLASSPATH entries required by mapreduce" echo " mapredcp Dump CLASSPATH entries required by mapreduce"
echo " pe Run PerformanceEvaluation" echo " pe Run PerformanceEvaluation"
echo " ltt Run LoadTestTool" echo " ltt Run LoadTestTool"
echo " canary Run the Canary tool"
echo " version Print the version" echo " version Print the version"
echo " CLASSNAME Run the class named CLASSNAME" echo " CLASSNAME Run the class named CLASSNAME"
exit 1 exit 1
@ -368,6 +369,9 @@ elif [ "$COMMAND" = "pe" ] ; then
elif [ "$COMMAND" = "ltt" ] ; then elif [ "$COMMAND" = "ltt" ] ; then
CLASS='org.apache.hadoop.hbase.util.LoadTestTool' CLASS='org.apache.hadoop.hbase.util.LoadTestTool'
HBASE_OPTS="$HBASE_OPTS $HBASE_LTT_OPTS" HBASE_OPTS="$HBASE_OPTS $HBASE_LTT_OPTS"
elif [ "$COMMAND" = "canary" ] ; then
CLASS='org.apache.hadoop.hbase.tool.Canary'
HBASE_OPTS="$HBASE_OPTS $HBASE_CANARY_OPTS"
elif [ "$COMMAND" = "version" ] ; then elif [ "$COMMAND" = "version" ] ; then
CLASS='org.apache.hadoop.hbase.util.VersionInfo' CLASS='org.apache.hadoop.hbase.util.VersionInfo'
else else

View File

@ -195,6 +195,10 @@ public final class Canary implements Tool {
Table table = null; Table table = null;
HTableDescriptor tableDesc = null; HTableDescriptor tableDesc = null;
try { try {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading table descriptor for table %s",
region.getTable()));
}
table = connection.getTable(region.getTable()); table = connection.getTable(region.getTable());
tableDesc = table.getTableDescriptor(); tableDesc = table.getTableDescriptor();
} catch (IOException e) { } catch (IOException e) {
@ -232,20 +236,24 @@ public final class Canary implements Tool {
scan.setFilter(new FirstKeyOnlyFilter()); scan.setFilter(new FirstKeyOnlyFilter());
scan.addFamily(column.getName()); scan.addFamily(column.getName());
scan.setMaxResultSize(1L); scan.setMaxResultSize(1L);
scan.setSmall(true);
} }
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading from table %s region %s column family %s and key %s",
tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(),
Bytes.toStringBinary(startKey)));
}
try { try {
stopWatch.start();
if (startKey.length > 0) { if (startKey.length > 0) {
stopWatch.start();
table.get(get); table.get(get);
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
} else { } else {
stopWatch.start();
rs = table.getScanner(scan); rs = table.getScanner(scan);
stopWatch.stop(); rs.next();
sink.publishReadTiming(region, column, stopWatch.getTime());
} }
stopWatch.stop();
sink.publishReadTiming(region, column, stopWatch.getTime());
} catch (Exception e) { } catch (Exception e) {
sink.publishReadFailure(region, column, e); sink.publishReadFailure(region, column, e);
} finally { } finally {
@ -286,6 +294,12 @@ public final class Canary implements Tool {
byte[] value = new byte[writeValueSize]; byte[] value = new byte[writeValueSize];
Bytes.random(value); Bytes.random(value);
put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value); put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("writing to table %s region %s column family %s and key %s",
tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(),
Bytes.toStringBinary(rowToCheck)));
}
try { try {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
table.put(put); table.put(put);
@ -337,6 +351,11 @@ public final class Canary implements Tool {
table = connection.getTable(tableName); table = connection.getTable(tableName);
startKey = region.getStartKey(); startKey = region.getStartKey();
// Can't do a get on empty start row so do a Scan of first element if any instead. // Can't do a get on empty start row so do a Scan of first element if any instead.
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading from region server %s table %s region %s and key %s",
serverName, region.getTable(), region.getRegionNameAsString(),
Bytes.toStringBinary(startKey)));
}
if (startKey.length > 0) { if (startKey.length > 0) {
get = new Get(startKey); get = new Get(startKey);
get.setCacheBlocks(false); get.setCacheBlocks(false);
@ -350,8 +369,10 @@ public final class Canary implements Tool {
scan.setFilter(new FirstKeyOnlyFilter()); scan.setFilter(new FirstKeyOnlyFilter());
scan.setCaching(1); scan.setCaching(1);
scan.setMaxResultSize(1L); scan.setMaxResultSize(1L);
scan.setSmall(true);
stopWatch.start(); stopWatch.start();
ResultScanner s = table.getScanner(scan); ResultScanner s = table.getScanner(scan);
s.next();
s.close(); s.close();
stopWatch.stop(); stopWatch.stop();
} }
@ -547,8 +568,6 @@ public final class Canary implements Tool {
long startTime = 0; long startTime = 0;
long currentTimeLength = 0; long currentTimeLength = 0;
// Get a connection to use in below. // Get a connection to use in below.
// try-with-resources jdk7 construct. See
// http://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
try (Connection connection = ConnectionFactory.createConnection(this.conf)) { try (Connection connection = ConnectionFactory.createConnection(this.conf)) {
do { do {
// Do monitor !! // Do monitor !!
@ -613,8 +632,8 @@ public final class Canary implements Tool {
System.err.println(" only works in regionserver mode."); System.err.println(" only works in regionserver mode.");
System.err.println(" -daemon Continuous check at defined intervals."); System.err.println(" -daemon Continuous check at defined intervals.");
System.err.println(" -interval <N> Interval between checks (sec)"); System.err.println(" -interval <N> Interval between checks (sec)");
System.err.println(" -e Use region/regionserver as regular expression"); System.err.println(" -e Use table/regionserver as regular expression");
System.err.println(" which means the region/regionserver is regular expression pattern"); System.err.println(" which means the table/regionserver is regular expression pattern");
System.err.println(" -f <B> stop whole program if first error occurs," + System.err.println(" -f <B> stop whole program if first error occurs," +
" default is true"); " default is true");
System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)"); System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
@ -691,6 +710,7 @@ public final class Canary implements Tool {
this.executor = executor; this.executor = executor;
} }
@Override
public abstract void run(); public abstract void run();
protected boolean initAdmin() { protected boolean initAdmin() {
@ -793,11 +813,17 @@ public final class Canary implements Tool {
HTableDescriptor[] tds = null; HTableDescriptor[] tds = null;
Set<String> tmpTables = new TreeSet<String>(); Set<String> tmpTables = new TreeSet<String>();
try { try {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables"));
}
tds = this.admin.listTables(pattern);
if (tds == null) {
tds = new HTableDescriptor[0];
}
for (String monitorTarget : monitorTargets) { for (String monitorTarget : monitorTargets) {
pattern = Pattern.compile(monitorTarget); pattern = Pattern.compile(monitorTarget);
tds = this.admin.listTables(pattern); for (HTableDescriptor td : tds) {
if (tds != null) { if (pattern.matcher(td.getNameAsString()).matches()) {
for (HTableDescriptor td : tds) {
tmpTables.add(td.getNameAsString()); tmpTables.add(td.getNameAsString());
} }
} }
@ -826,6 +852,9 @@ public final class Canary implements Tool {
* canary entry point to monitor all the tables. * canary entry point to monitor all the tables.
*/ */
private List<Future<Void>> sniff(TaskType taskType) throws Exception { private List<Future<Void>> sniff(TaskType taskType) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables"));
}
List<Future<Void>> taskFutures = new LinkedList<Future<Void>>(); List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
for (HTableDescriptor table : admin.listTables()) { for (HTableDescriptor table : admin.listTables()) {
if (admin.isTableEnabled(table.getTableName()) if (admin.isTableEnabled(table.getTableName())
@ -873,7 +902,7 @@ public final class Canary implements Tool {
int numberOfRegions = (int)(numberOfServers * regionsLowerLimit); int numberOfRegions = (int)(numberOfServers * regionsLowerLimit);
LOG.info("Number of live regionservers: " + numberOfServers + ", " LOG.info("Number of live regionservers: " + numberOfServers + ", "
+ "pre-splitting the canary table into " + numberOfRegions + " regions " + "pre-splitting the canary table into " + numberOfRegions + " regions "
+ "(current lower limi of regions per server is " + regionsLowerLimit + "(current lower limit of regions per server is " + regionsLowerLimit
+ " and you can change it by config: " + " and you can change it by config: "
+ HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )"); + HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )");
HTableDescriptor desc = new HTableDescriptor(writeTableName); HTableDescriptor desc = new HTableDescriptor(writeTableName);
@ -916,6 +945,10 @@ public final class Canary implements Tool {
*/ */
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, String tableName, private static List<Future<Void>> sniff(final Admin admin, final Sink sink, String tableName,
ExecutorService executor, TaskType taskType) throws Exception { ExecutorService executor, TaskType taskType) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("checking table is enabled and getting table descriptor for table %s",
tableName));
}
if (admin.isTableEnabled(TableName.valueOf(tableName))) { if (admin.isTableEnabled(TableName.valueOf(tableName))) {
return Canary.sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)), return Canary.sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)),
executor, taskType); executor, taskType);
@ -930,6 +963,11 @@ public final class Canary implements Tool {
*/ */
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, private static List<Future<Void>> sniff(final Admin admin, final Sink sink,
HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType) throws Exception { HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of regions for table %s", tableDesc.getTableName()));
}
Table table = null; Table table = null;
try { try {
table = admin.getConnection().getTable(tableDesc.getTableName()); table = admin.getConnection().getTable(tableDesc.getTableName());
@ -975,6 +1013,9 @@ public final class Canary implements Tool {
List<String> foundTableNames = new ArrayList<String>(); List<String> foundTableNames = new ArrayList<String>();
TableName[] tableNames = null; TableName[] tableNames = null;
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables"));
}
try { try {
tableNames = this.admin.listTableNames(); tableNames = this.admin.listTableNames();
} catch (IOException e) { } catch (IOException e) {
@ -1060,6 +1101,9 @@ public final class Canary implements Tool {
Table table = null; Table table = null;
RegionLocator regionLocator = null; RegionLocator regionLocator = null;
try { try {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of tables and locations"));
}
HTableDescriptor[] tableDescs = this.admin.listTables(); HTableDescriptor[] tableDescs = this.admin.listTables();
List<HRegionInfo> regions = null; List<HRegionInfo> regions = null;
for (HTableDescriptor tableDesc : tableDescs) { for (HTableDescriptor tableDesc : tableDescs) {

View File

@ -79,7 +79,7 @@ There is a Canary class can help users to canary-test the HBase cluster status,
To see the usage, use the `--help` parameter. To see the usage, use the `--help` parameter.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -help $ ${HBASE_HOME}/bin/hbase canary -help
Usage: bin/hbase org.apache.hadoop.hbase.tool.Canary [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..] Usage: bin/hbase org.apache.hadoop.hbase.tool.Canary [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]
where [opts] are: where [opts] are:
@ -126,7 +126,7 @@ Following are some examples based on the previous given case.
==== Canary test for every column family (store) of every region of every table ==== Canary test for every column family (store) of every region of every table
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary $ ${HBASE_HOME}/bin/hbase canary
3/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf1 in 2ms 3/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf1 in 2ms
13/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf2 in 2ms 13/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf2 in 2ms
@ -147,7 +147,7 @@ This is a default behavior of the this tool does.
You can also test one or more specific tables. You can also test one or more specific tables.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary test-01 test-02 $ ${HBASE_HOME}/bin/hbase canary test-01 test-02
---- ----
==== Canary test with RegionServer granularity ==== Canary test with RegionServer granularity
@ -155,7 +155,7 @@ $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary test-01 test-02
This will pick one small piece of data from each RegionServer, and can also put your RegionServer name as input options for canary-test specific RegionServer. This will pick one small piece of data from each RegionServer, and can also put your RegionServer name as input options for canary-test specific RegionServer.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -regionserver $ ${HBASE_HOME}/bin/hbase canary -regionserver
13/12/09 06:05:17 INFO tool.Canary: Read from table:test-01 on region server:rs2 in 72ms 13/12/09 06:05:17 INFO tool.Canary: Read from table:test-01 on region server:rs2 in 72ms
13/12/09 06:05:17 INFO tool.Canary: Read from table:test-02 on region server:rs3 in 34ms 13/12/09 06:05:17 INFO tool.Canary: Read from table:test-02 on region server:rs3 in 34ms
@ -167,7 +167,7 @@ $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -regionserver
This will test both table test-01 and test-02. This will test both table test-01 and test-02.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -e test-0[1-2] $ ${HBASE_HOME}/bin/hbase canary -e test-0[1-2]
---- ----
==== Run canary test as daemon mode ==== Run canary test as daemon mode
@ -176,13 +176,13 @@ Run repeatedly with interval defined in option `-interval` whose default value i
This daemon will stop itself and return non-zero error code if any error occurs, due to the default value of option -f is true. This daemon will stop itself and return non-zero error code if any error occurs, due to the default value of option -f is true.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -daemon $ ${HBASE_HOME}/bin/hbase canary -daemon
---- ----
Run repeatedly with internal 5 seconds and will not stop itself even if errors occur in the test. Run repeatedly with internal 5 seconds and will not stop itself even if errors occur in the test.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -daemon -interval 50000 -f false $ ${HBASE_HOME}/bin/hbase canary -daemon -interval 50000 -f false
---- ----
==== Force timeout if canary test stuck ==== Force timeout if canary test stuck
@ -192,7 +192,7 @@ Because of this we provide a timeout option to kill the canary test and return a
This run sets the timeout value to 60 seconds, the default value is 600 seconds. This run sets the timeout value to 60 seconds, the default value is 600 seconds.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -t 600000 $ ${HBASE_HOME}/bin/hbase canary -t 600000
---- ----
==== Enable write sniffing in canary ==== Enable write sniffing in canary
@ -203,12 +203,12 @@ When the write sniffing is enabled, the canary tool will create an hbase table a
regions of the table distributed on all region servers. In each sniffing period, the canary will regions of the table distributed on all region servers. In each sniffing period, the canary will
try to put data to these regions to check the write availability of each region server. try to put data to these regions to check the write availability of each region server.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing $ ${HBASE_HOME}/bin/hbase canary -writeSniffing
---- ----
The default write table is `hbase:canary` and can be specified by the option `-writeTable`. The default write table is `hbase:canary` and can be specified by the option `-writeTable`.
---- ----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing -writeTable ns:canary $ ${HBASE_HOME}/bin/hbase canary -writeSniffing -writeTable ns:canary
---- ----
The default value size of each put is 10 bytes and you can set it by the config key: The default value size of each put is 10 bytes and you can set it by the config key: