HBASE-13996 Add write sniffing in canary (Liu Shaohui)

Conflicts:
	hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java

Amending-Author: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
Andrew Purtell 2015-08-21 22:44:56 -07:00
parent f05770b96f
commit 234a4632a4
4 changed files with 285 additions and 30 deletions

View File

@ -805,7 +805,8 @@ public final class HConstants {
/** /**
* timeout for short operation RPC * timeout for short operation RPC
*/ */
public static final String HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY = "hbase.rpc.shortoperation.timeout"; public static final String HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY =
"hbase.rpc.shortoperation.timeout";
/** /**
* Default value of {@link #HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY} * Default value of {@link #HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY}
@ -860,8 +861,8 @@ public final class HConstants {
*/ */
public static final float HBASE_CLUSTER_MINIMUM_MEMORY_THRESHOLD = 0.2f; public static final float HBASE_CLUSTER_MINIMUM_MEMORY_THRESHOLD = 0.2f;
public static final Pattern CP_HTD_ATTR_KEY_PATTERN = Pattern.compile public static final Pattern CP_HTD_ATTR_KEY_PATTERN =
("^coprocessor\\$([0-9]+)$", Pattern.CASE_INSENSITIVE); Pattern.compile("^coprocessor\\$([0-9]+)$", Pattern.CASE_INSENSITIVE);
public static final Pattern CP_HTD_ATTR_VALUE_PATTERN = public static final Pattern CP_HTD_ATTR_VALUE_PATTERN =
Pattern.compile("(^[^\\|]*)\\|([^\\|]+)\\|[\\s]*([\\d]*)[\\s]*(\\|.*)?$"); Pattern.compile("(^[^\\|]*)\\|([^\\|]+)\\|[\\s]*([\\d]*)[\\s]*(\\|.*)?$");
@ -972,7 +973,8 @@ public final class HConstants {
public static final String DEFAULT_WAL_STORAGE_POLICY = "NONE"; public static final String DEFAULT_WAL_STORAGE_POLICY = "NONE";
/** Region in Transition metrics threshold time */ /** Region in Transition metrics threshold time */
public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD="hbase.metrics.rit.stuck.warning.threshold"; public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD =
"hbase.metrics.rit.stuck.warning.threshold";
public static final String LOAD_BALANCER_SLOP_KEY = "hbase.regions.slop"; public static final String LOAD_BALANCER_SLOP_KEY = "hbase.regions.slop";
@ -1068,7 +1070,8 @@ public final class HConstants {
* 0.0.0.0. * 0.0.0.0.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-9961">HBASE-9961</a> * @see <a href="https://issues.apache.org/jira/browse/HBASE-9961">HBASE-9961</a>
*/ */
public static final String STATUS_MULTICAST_BIND_ADDRESS = "hbase.status.multicast.bind.address.ip"; public static final String STATUS_MULTICAST_BIND_ADDRESS =
"hbase.status.multicast.bind.address.ip";
public static final String DEFAULT_STATUS_MULTICAST_BIND_ADDRESS = "0.0.0.0"; public static final String DEFAULT_STATUS_MULTICAST_BIND_ADDRESS = "0.0.0.0";
/** /**
@ -1197,6 +1200,20 @@ public final class HConstants {
public static final String REGION_SPLIT_THREADS_MAX = public static final String REGION_SPLIT_THREADS_MAX =
"hbase.regionserver.region.split.threads.max"; "hbase.regionserver.region.split.threads.max";
/** Canary config keys */
public static final String HBASE_CANARY_WRITE_DATA_TTL_KEY = "hbase.canary.write.data.ttl";
public static final String HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY =
"hbase.canary.write.perserver.regions.lowerLimit";
public static final String HBASE_CANARY_WRITE_PERSERVER_REGIONS_UPPERLIMIT_KEY =
"hbase.canary.write.perserver.regions.upperLimit";
public static final String HBASE_CANARY_WRITE_VALUE_SIZE_KEY = "hbase.canary.write.value.size";
public static final String HBASE_CANARY_WRITE_TABLE_CHECK_PERIOD_KEY =
"hbase.canary.write.table.check.period";
private HConstants() { private HConstants() {
// Can't be instantiated with this ctor. // Can't be instantiated with this ctor.
} }

View File

@ -48,6 +48,7 @@ org.apache.hadoop.hbase.master.RegionState;
org.apache.hadoop.hbase.HTableDescriptor; org.apache.hadoop.hbase.HTableDescriptor;
org.apache.hadoop.hbase.HBaseConfiguration; org.apache.hadoop.hbase.HBaseConfiguration;
org.apache.hadoop.hbase.TableName; org.apache.hadoop.hbase.TableName;
org.apache.hadoop.hbase.tool.Canary;
org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
org.apache.hadoop.hbase.master.DeadServer; org.apache.hadoop.hbase.master.DeadServer;
org.apache.hadoop.hbase.protobuf.ProtobufUtil; org.apache.hadoop.hbase.protobuf.ProtobufUtil;
@ -366,11 +367,14 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
</%if> </%if>
<%java>String description = null; <%java>String description = null;
if (tableName.equals(TableName.META_TABLE_NAME)){ if (tableName.equals(TableName.META_TABLE_NAME)){
description = "The hbase:meta table holds references to all User Table regions"; description = "The hbase:meta table holds references to all User Table regions.";
} else if (tableName.equals(Canary.DEFAULT_WRITE_TABLE_NAME)){
description = "The hbase:canary table is used to sniff the write availbility of"
+ " each regionserver.";
} else if (tableName.equals(AccessControlLists.ACL_TABLE_NAME)){ } else if (tableName.equals(AccessControlLists.ACL_TABLE_NAME)){
description = "The hbase:acl table holds information about acl"; description = "The hbase:acl table holds information about acl";
} else if (tableName.equals(VisibilityConstants.LABELS_TABLE_NAME)){ } else if (tableName.equals(VisibilityConstants.LABELS_TABLE_NAME)){
description = "The hbase:labels table holds information about visibility labels"; description = "The hbase:labels table holds information about visibility labels.";
} else if (tableName.equals(TableName.NAMESPACE_TABLE_NAME)){ } else if (tableName.equals(TableName.NAMESPACE_TABLE_NAME)){
description = "The hbase:namespace table holds information about namespaces."; description = "The hbase:namespace table holds information about namespaces.";
} else if (tableName.equals(QuotaUtil.QUOTA_TABLE_NAME)){ } else if (tableName.equals(QuotaUtil.QUOTA_TABLE_NAME)){

View File

@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -47,9 +48,11 @@ import org.apache.hadoop.hbase.ChoreService;
import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
@ -59,13 +62,17 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.NeedUnmanagedConnectionException; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.tool.Canary.RegionTask.TaskType;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.hbase.util.RegionSplitter;
import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.ToolRunner;
@ -86,6 +93,9 @@ public final class Canary implements Tool {
public void publishReadFailure(HRegionInfo region, Exception e); public void publishReadFailure(HRegionInfo region, Exception e);
public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
public void publishWriteFailure(HRegionInfo region, Exception e);
public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
} }
// new extended sink for output regionserver mode info // new extended sink for output regionserver mode info
// do not change the Sink interface directly due to maintaining the API // do not change the Sink interface directly due to maintaining the API
@ -113,6 +123,23 @@ public final class Canary implements Tool {
LOG.info(String.format("read from region %s column family %s in %dms", LOG.info(String.format("read from region %s column family %s in %dms",
region.getRegionNameAsString(), column.getNameAsString(), msTime)); region.getRegionNameAsString(), column.getNameAsString(), msTime));
} }
@Override
public void publishWriteFailure(HRegionInfo region, Exception e) {
LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e);
}
@Override
public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
LOG.error(String.format("write to region %s column family %s failed",
region.getRegionNameAsString(), column.getNameAsString()), e);
}
@Override
public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
LOG.info(String.format("write to region %s column family %s in %dms",
region.getRegionNameAsString(), column.getNameAsString(), msTime));
}
} }
// a ExtendedSink implementation // a ExtendedSink implementation
public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink { public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
@ -134,18 +161,34 @@ public final class Canary implements Tool {
* failure. * failure.
*/ */
static class RegionTask implements Callable<Void> { static class RegionTask implements Callable<Void> {
public enum TaskType{
READ, WRITE
}
private Connection connection; private Connection connection;
private HRegionInfo region; private HRegionInfo region;
private Sink sink; private Sink sink;
private TaskType taskType;
RegionTask(Connection connection, HRegionInfo region, Sink sink) { RegionTask(Connection connection, HRegionInfo region, Sink sink, TaskType taskType) {
this.connection = connection; this.connection = connection;
this.region = region; this.region = region;
this.sink = sink; this.sink = sink;
this.taskType = taskType;
} }
@Override @Override
public Void call() { public Void call() {
switch (taskType) {
case READ:
return read();
case WRITE:
return write();
default:
return read();
}
}
public Void read() {
Table table = null; Table table = null;
HTableDescriptor tableDesc = null; HTableDescriptor tableDesc = null;
try { try {
@ -158,6 +201,7 @@ public final class Canary implements Tool {
try { try {
table.close(); table.close();
} catch (IOException ioe) { } catch (IOException ioe) {
LOG.error("Close table failed", e);
} }
} }
return null; return null;
@ -212,6 +256,44 @@ public final class Canary implements Tool {
try { try {
table.close(); table.close();
} catch (IOException e) { } catch (IOException e) {
LOG.error("Close table failed", e);
}
return null;
}
/**
* Check writes for the canary table
* @return
*/
private Void write() {
Table table = null;
HTableDescriptor tableDesc = null;
try {
table = connection.getTable(region.getTable());
tableDesc = table.getTableDescriptor();
byte[] rowToCheck = region.getStartKey();
if (rowToCheck.length == 0) {
rowToCheck = new byte[]{0x0};
}
int writeValueSize =
connection.getConfiguration().getInt(HConstants.HBASE_CANARY_WRITE_VALUE_SIZE_KEY, 10);
for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
Put put = new Put(rowToCheck);
byte[] value = new byte[writeValueSize];
Bytes.random(value);
put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value);
try {
long startTime = System.currentTimeMillis();
table.put(put);
long time = System.currentTimeMillis() - startTime;
sink.publishWriteTiming(region, column, time);
} catch (Exception e) {
sink.publishWriteFailure(region, column, e);
}
}
table.close();
} catch (IOException e) {
sink.publishWriteFailure(region, e);
} }
return null; return null;
} }
@ -269,11 +351,12 @@ public final class Canary implements Tool {
} }
sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime()); sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime());
} catch (TableNotFoundException tnfe) { } catch (TableNotFoundException tnfe) {
LOG.error("Table may be deleted", tnfe);
// This is ignored because it doesn't imply that the regionserver is dead // This is ignored because it doesn't imply that the regionserver is dead
} catch (TableNotEnabledException tnee) { } catch (TableNotEnabledException tnee) {
// This is considered a success since we got a response. // This is considered a success since we got a response.
LOG.debug("The targeted table was disabled. Assuming success."); LOG.debug("The targeted table was disabled. Assuming success.");
} catch (DoNotRetryIOException | NeedUnmanagedConnectionException dnrioe) { } catch (DoNotRetryIOException dnrioe) {
sink.publishReadFailure(tableName.getNameAsString(), serverName); sink.publishReadFailure(tableName.getNameAsString(), serverName);
LOG.error(dnrioe); LOG.error(dnrioe);
} catch (IOException e) { } catch (IOException e) {
@ -284,6 +367,7 @@ public final class Canary implements Tool {
try { try {
table.close(); table.close();
} catch (IOException e) {/* DO NOTHING */ } catch (IOException e) {/* DO NOTHING */
LOG.error("Close table failed", e);
} }
} }
scan = null; scan = null;
@ -302,11 +386,15 @@ public final class Canary implements Tool {
private static final long DEFAULT_INTERVAL = 6000; private static final long DEFAULT_INTERVAL = 6000;
private static final long DEFAULT_TIMEOUT = 600000; // 10 mins private static final long DEFAULT_TIMEOUT = 600000; // 10 mins
private static final int MAX_THREADS_NUM = 16; // #threads to contact regions private static final int MAX_THREADS_NUM = 16; // #threads to contact regions
private static final Log LOG = LogFactory.getLog(Canary.class); private static final Log LOG = LogFactory.getLog(Canary.class);
public static final TableName DEFAULT_WRITE_TABLE_NAME = TableName.valueOf(
NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR, "canary");
private static final String CANARY_TABLE_FAMILY_NAME = "Test";
private Configuration conf = null; private Configuration conf = null;
private long interval = 0; private long interval = 0;
private Sink sink = null; private Sink sink = null;
@ -315,6 +403,9 @@ public final class Canary implements Tool {
private long timeout = DEFAULT_TIMEOUT; private long timeout = DEFAULT_TIMEOUT;
private boolean failOnError = true; private boolean failOnError = true;
private boolean regionServerMode = false; private boolean regionServerMode = false;
private boolean writeSniffing = false;
private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME;
private ExecutorService executor; // threads to retrieve data from regionservers private ExecutorService executor; // threads to retrieve data from regionservers
public Canary() { public Canary() {
@ -336,11 +427,8 @@ public final class Canary implements Tool {
this.conf = conf; this.conf = conf;
} }
@Override private int parseArgs(String[] args) {
public int run(String[] args) throws Exception {
int index = -1; int index = -1;
ChoreService choreService = null;
// Process command line args // Process command line args
for (int i = 0; i < args.length; i++) { for (int i = 0; i < args.length; i++) {
String cmd = args[i]; String cmd = args[i];
@ -375,6 +463,8 @@ public final class Canary implements Tool {
} }
} else if(cmd.equals("-regionserver")) { } else if(cmd.equals("-regionserver")) {
this.regionServerMode = true; this.regionServerMode = true;
} else if(cmd.equals("-writeSniffing")) {
this.writeSniffing = true;
} else if (cmd.equals("-e")) { } else if (cmd.equals("-e")) {
this.useRegExp = true; this.useRegExp = true;
} else if (cmd.equals("-t")) { } else if (cmd.equals("-t")) {
@ -391,7 +481,14 @@ public final class Canary implements Tool {
System.err.println("-t needs a numeric value argument."); System.err.println("-t needs a numeric value argument.");
printUsageAndExit(); printUsageAndExit();
} }
} else if (cmd.equals("-writeTable")) {
i++;
if (i == args.length) {
System.err.println("-writeTable needs a string value argument.");
printUsageAndExit();
}
this.writeTableName = TableName.valueOf(args[i]);
} else if (cmd.equals("-f")) { } else if (cmd.equals("-f")) {
i++; i++;
@ -412,6 +509,13 @@ public final class Canary implements Tool {
index = i; index = i;
} }
} }
return index;
}
@Override
public int run(String[] args) throws Exception {
int index = parseArgs(args);
ChoreService choreService = null;
// Launches chore for refreshing kerberos credentials if security is enabled. // Launches chore for refreshing kerberos credentials if security is enabled.
// Please see http://hbase.apache.org/book.html#_running_canary_in_a_kerberos_enabled_cluster // Please see http://hbase.apache.org/book.html#_running_canary_in_a_kerberos_enabled_cluster
@ -497,6 +601,9 @@ public final class Canary implements Tool {
System.err.println(" -f <B> stop whole program if first error occurs," + System.err.println(" -f <B> stop whole program if first error occurs," +
" default is true"); " default is true");
System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)"); System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
System.err.println(" -writeSniffing enable the write sniffing in canary");
System.err.println(" -writeTable The table used for write sniffing."
+ " Default is hbase:canary");
System.exit(USAGE_EXIT_CODE); System.exit(USAGE_EXIT_CODE);
} }
@ -523,7 +630,8 @@ public final class Canary implements Tool {
(ExtendedSink) this.sink, this.executor); (ExtendedSink) this.sink, this.executor);
} else { } else {
monitor = monitor =
new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor); new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor,
this.writeSniffing, this.writeTableName);
} }
return monitor; return monitor;
} }
@ -586,10 +694,34 @@ public final class Canary implements Tool {
// a monitor for region mode // a monitor for region mode
private static class RegionMonitor extends Monitor { private static class RegionMonitor extends Monitor {
// 10 minutes
private static final int DEFAULT_WRITE_TABLE_CHECK_PERIOD = 10 * 60 * 1000;
// 1 days
private static final int DEFAULT_WRITE_DATA_TTL = 24 * 60 * 60;
private long lastCheckTime = -1;
private boolean writeSniffing;
private TableName writeTableName;
private int writeDataTTL;
private float regionsLowerLimit;
private float regionsUpperLimit;
private int checkPeriod;
public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp, public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
Sink sink, ExecutorService executor) { Sink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName) {
super(connection, monitorTargets, useRegExp, sink, executor); super(connection, monitorTargets, useRegExp, sink, executor);
Configuration conf = connection.getConfiguration();
this.writeSniffing = writeSniffing;
this.writeTableName = writeTableName;
this.writeDataTTL =
conf.getInt(HConstants.HBASE_CANARY_WRITE_DATA_TTL_KEY, DEFAULT_WRITE_DATA_TTL);
this.regionsLowerLimit =
conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY, 1.0f);
this.regionsUpperLimit =
conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_UPPERLIMIT_KEY, 1.5f);
this.checkPeriod =
conf.getInt(HConstants.HBASE_CANARY_WRITE_TABLE_CHECK_PERIOD_KEY,
DEFAULT_WRITE_TABLE_CHECK_PERIOD);
} }
@Override @Override
@ -601,11 +733,26 @@ public final class Canary implements Tool {
String[] tables = generateMonitorTables(this.targets); String[] tables = generateMonitorTables(this.targets);
this.initialized = true; this.initialized = true;
for (String table : tables) { for (String table : tables) {
taskFutures.addAll(Canary.sniff(admin, sink, table, executor)); taskFutures.addAll(Canary.sniff(admin, sink, table, executor, TaskType.READ));
} }
} else { } else {
taskFutures.addAll(sniff()); taskFutures.addAll(sniff(TaskType.READ));
} }
if (writeSniffing) {
if (EnvironmentEdgeManager.currentTime() - lastCheckTime > checkPeriod) {
try {
checkWriteTableDistribution();
} catch (IOException e) {
LOG.error("Check canary table distribution failed!", e);
}
lastCheckTime = EnvironmentEdgeManager.currentTime();
}
// sniff canary table with write operation
taskFutures.addAll(Canary.sniff(admin, sink,
admin.getTableDescriptor(writeTableName), executor, TaskType.WRITE));
}
for (Future<Void> future : taskFutures) { for (Future<Void> future : taskFutures) {
try { try {
future.get(); future.get();
@ -661,25 +808,91 @@ public final class Canary implements Tool {
/* /*
* canary entry point to monitor all the tables. * canary entry point to monitor all the tables.
*/ */
private List<Future<Void>> sniff() throws Exception { private List<Future<Void>> sniff(TaskType taskType) throws Exception {
List<Future<Void>> taskFutures = new LinkedList<Future<Void>>(); List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
for (HTableDescriptor table : admin.listTables()) { for (HTableDescriptor table : admin.listTables()) {
if (admin.isTableEnabled(table.getTableName())) { if (admin.isTableEnabled(table.getTableName())
taskFutures.addAll(Canary.sniff(admin, sink, table, executor)); && (!table.getTableName().equals(writeTableName))) {
taskFutures.addAll(Canary.sniff(admin, sink, table, executor, taskType));
} }
} }
return taskFutures; return taskFutures;
} }
private void checkWriteTableDistribution() throws IOException {
if (!admin.tableExists(writeTableName)) {
int numberOfServers = admin.getClusterStatus().getServers().size();
if (numberOfServers == 0) {
throw new IllegalStateException("No live regionservers");
}
createWriteTable(numberOfServers);
}
if (!admin.isTableEnabled(writeTableName)) {
admin.enableTable(writeTableName);
}
int numberOfServers = admin.getClusterStatus().getServers().size();
List<HRegionLocation> locations;
RegionLocator locator = connection.getRegionLocator(writeTableName);
try {
locations = locator.getAllRegionLocations();
} finally {
locator.close();
}
int numberOfRegions = locations.size();
if (numberOfRegions < numberOfServers * regionsLowerLimit
|| numberOfRegions > numberOfServers * regionsUpperLimit) {
admin.disableTable(writeTableName);
admin.deleteTable(writeTableName);
createWriteTable(numberOfServers);
}
HashSet<ServerName> serverSet = new HashSet<ServerName>();
for (HRegionLocation location: locations) {
serverSet.add(location.getServerName());
}
int numberOfCoveredServers = serverSet.size();
if (numberOfCoveredServers < numberOfServers) {
admin.balancer();
}
}
private void createWriteTable(int numberOfServers) throws IOException {
int numberOfRegions = (int)(numberOfServers * regionsLowerLimit);
LOG.info("Number of live regionservers: " + numberOfServers + ", "
+ "pre-splitting the canary table into " + numberOfRegions + " regions "
+ "(current lower limi of regions per server is " + regionsLowerLimit
+ " and you can change it by config: "
+ HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )");
HTableDescriptor desc = new HTableDescriptor(writeTableName);
HColumnDescriptor family = new HColumnDescriptor(CANARY_TABLE_FAMILY_NAME);
family.setMaxVersions(1);
family.setTimeToLive(writeDataTTL);
desc.addFamily(family);
byte[][] splits = new RegionSplitter.HexStringSplit().split(numberOfRegions);
admin.createTable(desc, splits);
}
} }
/** /**
* Canary entry point for specified table. * Canary entry point for specified table.
* @throws Exception * @throws Exception
*/ */
public static void sniff(final Admin admin, TableName tableName) throws Exception { public static void sniff(final Admin admin, TableName tableName)
throws Exception {
sniff(admin, tableName, TaskType.READ);
}
/**
* Canary entry point for specified table with task type(read/write)
* @throws Exception
*/
public static void sniff(final Admin admin, TableName tableName, TaskType taskType)
throws Exception {
List<Future<Void>> taskFutures = List<Future<Void>> taskFutures =
Canary.sniff(admin, new StdOutSink(), tableName.getNameAsString(), Canary.sniff(admin, new StdOutSink(), tableName.getNameAsString(),
new ScheduledThreadPoolExecutor(1)); new ScheduledThreadPoolExecutor(1), taskType);
for (Future<Void> future : taskFutures) { for (Future<Void> future : taskFutures) {
future.get(); future.get();
} }
@ -690,10 +903,10 @@ public final class Canary implements Tool {
* @throws Exception * @throws Exception
*/ */
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, String tableName, private static List<Future<Void>> sniff(final Admin admin, final Sink sink, String tableName,
ExecutorService executor) throws Exception { ExecutorService executor, TaskType taskType) throws Exception {
if (admin.isTableEnabled(TableName.valueOf(tableName))) { if (admin.isTableEnabled(TableName.valueOf(tableName))) {
return Canary.sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)), return Canary.sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)),
executor); executor, taskType);
} else { } else {
LOG.warn(String.format("Table %s is not enabled", tableName)); LOG.warn(String.format("Table %s is not enabled", tableName));
} }
@ -704,7 +917,7 @@ public final class Canary implements Tool {
* Loops over regions that owns this table, and output some information abouts the state. * Loops over regions that owns this table, and output some information abouts the state.
*/ */
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, private static List<Future<Void>> sniff(final Admin admin, final Sink sink,
HTableDescriptor tableDesc, ExecutorService executor) throws Exception { HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType) throws Exception {
Table table = null; Table table = null;
try { try {
table = admin.getConnection().getTable(tableDesc.getTableName()); table = admin.getConnection().getTable(tableDesc.getTableName());
@ -714,7 +927,7 @@ public final class Canary implements Tool {
List<RegionTask> tasks = new ArrayList<RegionTask>(); List<RegionTask> tasks = new ArrayList<RegionTask>();
try { try {
for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) { for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) {
tasks.add(new RegionTask(admin.getConnection(), region, sink)); tasks.add(new RegionTask(admin.getConnection(), region, sink, taskType));
} }
} finally { } finally {
table.close(); table.close();

View File

@ -92,6 +92,8 @@ Usage: bin/hbase org.apache.hadoop.hbase.tool.Canary [opts] [table1 [table2]...]
which means the region/regionserver is regular expression pattern which means the region/regionserver is regular expression pattern
-f <B> stop whole program if first error occurs, default is true -f <B> stop whole program if first error occurs, default is true
-t <N> timeout for a check, default is 600000 (milliseconds) -t <N> timeout for a check, default is 600000 (milliseconds)
-writeSniffing enable the write sniffing in canary
-writeTable The table used for write sniffing. Default is hbase:canary
---- ----
This tool will return non zero error codes to user for collaborating with other monitoring tools, such as Nagios. This tool will return non zero error codes to user for collaborating with other monitoring tools, such as Nagios.
@ -193,6 +195,25 @@ This run sets the timeout value to 60 seconds, the default value is 600 seconds.
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -t 600000 $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -t 600000
---- ----
==== Enable write sniffing in canary
By default, the canary tool only check the read operations, it's hard to find the problem in the
write path. To enable the write sniffing, you can run canary with the `-writeSniffing` option.
When the write sniffing is enabled, the canary tool will create a hbase table and make sure the
regions of the table distributed on all region servers. In each sniffing period, the canary will
try to put data to these regions to check the write availability of each region server.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing
----
The default write table is `hbase:canary` and can be specified by the option `-writeTable`.
----
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing -writeTable ns:canary
----
The default value size of each put is 10 bytes and you can set it by the config key:
`hbase.canary.write.value.size`.
==== Running Canary in a Kerberos-enabled Cluster ==== Running Canary in a Kerberos-enabled Cluster
To run Canary in a Kerberos-enabled cluster, configure the following two properties in _hbase-site.xml_: To run Canary in a Kerberos-enabled cluster, configure the following two properties in _hbase-site.xml_: