HBASE-4393 Implement a canary monitoring program
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1329574 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e031dc8d34
commit
1d6b501c9b
|
@ -0,0 +1,253 @@
|
|||
/**
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.tool;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.TableNotFoundException;
|
||||
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
|
||||
/**
|
||||
* HBase Canary Tool, that that can be used to do
|
||||
* "canary monitoring" of a running HBase cluster.
|
||||
*
|
||||
* Foreach region tries to get one row per column family
|
||||
* and outputs some information about failure or latency.
|
||||
*/
|
||||
public final class Canary implements Tool {
|
||||
// Sink interface used by the canary to outputs information
|
||||
public interface Sink {
|
||||
public void publishReadFailure(HRegionInfo region);
|
||||
public void publishReadFailure(HRegionInfo region, HColumnDescriptor column);
|
||||
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
|
||||
}
|
||||
|
||||
// Simple implementation of canary sink that allows to plot on
|
||||
// file or standard output timings or failures.
|
||||
public static class StdOutSink implements Sink {
|
||||
@Override
|
||||
public void publishReadFailure(HRegionInfo region) {
|
||||
LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void publishReadFailure(HRegionInfo region, HColumnDescriptor column) {
|
||||
LOG.error(String.format("read from region %s column family %s failed",
|
||||
region.getRegionNameAsString(), column.getNameAsString()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
|
||||
LOG.info(String.format("read from region %s column family %s in %dms",
|
||||
region.getRegionNameAsString(), column.getNameAsString(), msTime));
|
||||
}
|
||||
}
|
||||
|
||||
private static final long DEFAULT_INTERVAL = 6000;
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(Canary.class);
|
||||
|
||||
private Configuration conf = null;
|
||||
private HBaseAdmin admin = null;
|
||||
private long interval = 0;
|
||||
private Sink sink = null;
|
||||
|
||||
public Canary() {
|
||||
this(new StdOutSink());
|
||||
}
|
||||
|
||||
public Canary(Sink sink) {
|
||||
this.sink = sink;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Configuration getConf() {
|
||||
return conf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
this.conf = conf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int run(String[] args) throws Exception {
|
||||
int tables_index = -1;
|
||||
|
||||
// Process command line args
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String cmd = args[i];
|
||||
|
||||
if (cmd.startsWith("-")) {
|
||||
if (tables_index >= 0) {
|
||||
// command line args must be in the form: [opts] [table 1 [table 2 ...]]
|
||||
System.err.println("Invalid command line options");
|
||||
printUsageAndExit();
|
||||
}
|
||||
|
||||
if (cmd.equals("-help")) {
|
||||
// user asked for help, print the help and quit.
|
||||
printUsageAndExit();
|
||||
} else if (cmd.equals("-daemon") && interval == 0) {
|
||||
// user asked for daemon mode, set a default interval between checks
|
||||
interval = DEFAULT_INTERVAL;
|
||||
} else if (cmd.equals("-interval")) {
|
||||
// user has specified an interval for canary breaths (-interval N)
|
||||
i++;
|
||||
|
||||
if (i == args.length) {
|
||||
System.err.println("-interval needs a numeric value argument.");
|
||||
printUsageAndExit();
|
||||
}
|
||||
|
||||
try {
|
||||
interval = Long.parseLong(args[i]) * 1000;
|
||||
} catch (NumberFormatException e) {
|
||||
System.err.println("-interval needs a numeric value argument.");
|
||||
printUsageAndExit();
|
||||
}
|
||||
} else {
|
||||
// no options match
|
||||
System.err.println(cmd + " options is invalid.");
|
||||
printUsageAndExit();
|
||||
}
|
||||
} else if (tables_index < 0) {
|
||||
// keep track of first table name specified by the user
|
||||
tables_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize HBase conf and admin
|
||||
if (conf == null) conf = HBaseConfiguration.create();
|
||||
admin = new HBaseAdmin(conf);
|
||||
|
||||
// lets the canary monitor the cluster
|
||||
do {
|
||||
if (admin.isAborted()) {
|
||||
LOG.error("HBaseAdmin aborted");
|
||||
return(1);
|
||||
}
|
||||
|
||||
if (tables_index >= 0) {
|
||||
for (int i = tables_index; i < args.length; i++) {
|
||||
sniff(args[i]);
|
||||
}
|
||||
} else {
|
||||
sniff();
|
||||
}
|
||||
|
||||
Thread.sleep(interval);
|
||||
} while (interval > 0);
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
private void printUsageAndExit() {
|
||||
System.err.printf("Usage: bin/hbase %s [opts] [table 1 [table 2...]]\n", getClass().getName());
|
||||
System.err.println(" where [opts] are:");
|
||||
System.err.println(" -help Show this help and exit.");
|
||||
System.err.println(" -daemon Continuous check at defined intervals.");
|
||||
System.err.println(" -interval <N> Interval between checks (sec)");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* canary entry point to monitor all the tables.
|
||||
*/
|
||||
private void sniff() throws Exception {
|
||||
for (HTableDescriptor table : admin.listTables()) {
|
||||
sniff(table);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* canary entry point to monitor specified table.
|
||||
*/
|
||||
private void sniff(String tableName) throws Exception {
|
||||
if (admin.isTableAvailable(tableName)) {
|
||||
sniff(admin.getTableDescriptor(tableName.getBytes()));
|
||||
} else {
|
||||
LOG.warn(String.format("Table %s is not available", tableName));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Loops over regions that owns this table,
|
||||
* and output some information abouts the state.
|
||||
*/
|
||||
private void sniff(HTableDescriptor tableDesc) throws Exception {
|
||||
HTable table = null;
|
||||
|
||||
try {
|
||||
table = new HTable(admin.getConfiguration(), tableDesc.getName());
|
||||
} catch (TableNotFoundException e) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
|
||||
try {
|
||||
sniffRegion(region, table);
|
||||
} catch (Exception e) {
|
||||
sink.publishReadFailure(region);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For each column family of the region tries to get one row
|
||||
* and outputs the latency, or the failure.
|
||||
*/
|
||||
private void sniffRegion(HRegionInfo region, HTable table) throws Exception {
|
||||
HTableDescriptor tableDesc = table.getTableDescriptor();
|
||||
for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
|
||||
Get get = new Get(region.getStartKey());
|
||||
get.addFamily(column.getName());
|
||||
|
||||
try {
|
||||
long startTime = System.currentTimeMillis();
|
||||
table.get(get);
|
||||
long time = System.currentTimeMillis() - startTime;
|
||||
|
||||
sink.publishReadTiming(region, column, time);
|
||||
} catch (Exception e) {
|
||||
sink.publishReadFailure(region, column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
int exitCode = ToolRunner.run(new Canary(), args);
|
||||
System.exit(exitCode);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue