diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 5fafda004fa..067a4dd6be1 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -805,7 +805,8 @@ public final class HConstants {
/**
* timeout for short operation RPC
*/
- public static final String HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY = "hbase.rpc.shortoperation.timeout";
+ public static final String HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY =
+ "hbase.rpc.shortoperation.timeout";
/**
* Default value of {@link #HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY}
@@ -860,8 +861,8 @@ public final class HConstants {
*/
public static final float HBASE_CLUSTER_MINIMUM_MEMORY_THRESHOLD = 0.2f;
- public static final Pattern CP_HTD_ATTR_KEY_PATTERN = Pattern.compile
- ("^coprocessor\\$([0-9]+)$", Pattern.CASE_INSENSITIVE);
+ public static final Pattern CP_HTD_ATTR_KEY_PATTERN =
+ Pattern.compile("^coprocessor\\$([0-9]+)$", Pattern.CASE_INSENSITIVE);
public static final Pattern CP_HTD_ATTR_VALUE_PATTERN =
Pattern.compile("(^[^\\|]*)\\|([^\\|]+)\\|[\\s]*([\\d]*)[\\s]*(\\|.*)?$");
@@ -912,7 +913,7 @@ public final class HConstants {
* 1 => Abort only all of the handers have died
*/
public static final String REGION_SERVER_HANDLER_ABORT_ON_ERROR_PERCENT =
- "hbase.regionserver.handler.abort.on.error.percent";
+ "hbase.regionserver.handler.abort.on.error.percent";
public static final double DEFAULT_REGION_SERVER_HANDLER_ABORT_ON_ERROR_PERCENT = 0.5;
//High priority handlers to deal with admin requests and system table operation requests
@@ -972,7 +973,8 @@ public final class HConstants {
public static final String DEFAULT_WAL_STORAGE_POLICY = "NONE";
/** Region in Transition metrics threshold time */
- public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD="hbase.metrics.rit.stuck.warning.threshold";
+ public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD =
+ "hbase.metrics.rit.stuck.warning.threshold";
public static final String LOAD_BALANCER_SLOP_KEY = "hbase.regions.slop";
@@ -1068,7 +1070,8 @@ public final class HConstants {
* 0.0.0.0.
* @see HBASE-9961
*/
- public static final String STATUS_MULTICAST_BIND_ADDRESS = "hbase.status.multicast.bind.address.ip";
+ public static final String STATUS_MULTICAST_BIND_ADDRESS =
+ "hbase.status.multicast.bind.address.ip";
public static final String DEFAULT_STATUS_MULTICAST_BIND_ADDRESS = "0.0.0.0";
/**
@@ -1197,6 +1200,20 @@ public final class HConstants {
public static final String REGION_SPLIT_THREADS_MAX =
"hbase.regionserver.region.split.threads.max";
+ /** Canary config keys */
+ public static final String HBASE_CANARY_WRITE_DATA_TTL_KEY = "hbase.canary.write.data.ttl";
+
+ public static final String HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY =
+ "hbase.canary.write.perserver.regions.lowerLimit";
+
+ public static final String HBASE_CANARY_WRITE_PERSERVER_REGIONS_UPPERLIMIT_KEY =
+ "hbase.canary.write.perserver.regions.upperLimit";
+
+ public static final String HBASE_CANARY_WRITE_VALUE_SIZE_KEY = "hbase.canary.write.value.size";
+
+ public static final String HBASE_CANARY_WRITE_TABLE_CHECK_PERIOD_KEY =
+ "hbase.canary.write.table.check.period";
+
private HConstants() {
// Can't be instantiated with this ctor.
}
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon
index caa3535896f..c0b70a2c38f 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon
@@ -48,6 +48,7 @@ org.apache.hadoop.hbase.master.RegionState;
org.apache.hadoop.hbase.HTableDescriptor;
org.apache.hadoop.hbase.HBaseConfiguration;
org.apache.hadoop.hbase.TableName;
+org.apache.hadoop.hbase.tool.Canary;
org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
org.apache.hadoop.hbase.master.DeadServer;
org.apache.hadoop.hbase.protobuf.ProtobufUtil;
@@ -366,11 +367,14 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
%if>
<%java>String description = null;
if (tableName.equals(TableName.META_TABLE_NAME)){
- description = "The hbase:meta table holds references to all User Table regions";
+ description = "The hbase:meta table holds references to all User Table regions.";
+ } else if (tableName.equals(Canary.DEFAULT_WRITE_TABLE_NAME)){
+ description = "The hbase:canary table is used to sniff the write availbility of"
+ + " each regionserver.";
} else if (tableName.equals(AccessControlLists.ACL_TABLE_NAME)){
description = "The hbase:acl table holds information about acl";
- } else if (tableName.equals(VisibilityConstants.LABELS_TABLE_NAME)){
- description = "The hbase:labels table holds information about visibility labels";
+ } else if (tableName.equals(VisibilityConstants.LABELS_TABLE_NAME)){
+ description = "The hbase:labels table holds information about visibility labels.";
} else if (tableName.equals(TableName.NAMESPACE_TABLE_NAME)){
description = "The hbase:namespace table holds information about namespaces.";
} else if (tableName.equals(QuotaUtil.QUOTA_TABLE_NAME)){
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
index 2cf34b386fb..7f5c6842a6d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
@@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -47,9 +48,11 @@ import org.apache.hadoop.hbase.ChoreService;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
@@ -59,13 +62,17 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.NeedUnmanagedConnectionException;
+import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
+import org.apache.hadoop.hbase.tool.Canary.RegionTask.TaskType;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ReflectionUtils;
+import org.apache.hadoop.hbase.util.RegionSplitter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@@ -86,6 +93,9 @@ public final class Canary implements Tool {
public void publishReadFailure(HRegionInfo region, Exception e);
public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
+ public void publishWriteFailure(HRegionInfo region, Exception e);
+ public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
+ public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
}
// new extended sink for output regionserver mode info
// do not change the Sink interface directly due to maintaining the API
@@ -113,6 +123,23 @@ public final class Canary implements Tool {
LOG.info(String.format("read from region %s column family %s in %dms",
region.getRegionNameAsString(), column.getNameAsString(), msTime));
}
+
+ @Override
+ public void publishWriteFailure(HRegionInfo region, Exception e) {
+ LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e);
+ }
+
+ @Override
+ public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
+ LOG.error(String.format("write to region %s column family %s failed",
+ region.getRegionNameAsString(), column.getNameAsString()), e);
+ }
+
+ @Override
+ public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
+ LOG.info(String.format("write to region %s column family %s in %dms",
+ region.getRegionNameAsString(), column.getNameAsString(), msTime));
+ }
}
// a ExtendedSink implementation
public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
@@ -134,18 +161,34 @@ public final class Canary implements Tool {
* failure.
*/
static class RegionTask implements Callable {
+ public enum TaskType{
+ READ, WRITE
+ }
private Connection connection;
private HRegionInfo region;
private Sink sink;
+ private TaskType taskType;
- RegionTask(Connection connection, HRegionInfo region, Sink sink) {
+ RegionTask(Connection connection, HRegionInfo region, Sink sink, TaskType taskType) {
this.connection = connection;
this.region = region;
this.sink = sink;
+ this.taskType = taskType;
}
@Override
public Void call() {
+ switch (taskType) {
+ case READ:
+ return read();
+ case WRITE:
+ return write();
+ default:
+ return read();
+ }
+ }
+
+ public Void read() {
Table table = null;
HTableDescriptor tableDesc = null;
try {
@@ -158,6 +201,7 @@ public final class Canary implements Tool {
try {
table.close();
} catch (IOException ioe) {
+ LOG.error("Close table failed", e);
}
}
return null;
@@ -212,6 +256,44 @@ public final class Canary implements Tool {
try {
table.close();
} catch (IOException e) {
+ LOG.error("Close table failed", e);
+ }
+ return null;
+ }
+
+ /**
+ * Check writes for the canary table
+ * @return
+ */
+ private Void write() {
+ Table table = null;
+ HTableDescriptor tableDesc = null;
+ try {
+ table = connection.getTable(region.getTable());
+ tableDesc = table.getTableDescriptor();
+ byte[] rowToCheck = region.getStartKey();
+ if (rowToCheck.length == 0) {
+ rowToCheck = new byte[]{0x0};
+ }
+ int writeValueSize =
+ connection.getConfiguration().getInt(HConstants.HBASE_CANARY_WRITE_VALUE_SIZE_KEY, 10);
+ for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
+ Put put = new Put(rowToCheck);
+ byte[] value = new byte[writeValueSize];
+ Bytes.random(value);
+ put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value);
+ try {
+ long startTime = System.currentTimeMillis();
+ table.put(put);
+ long time = System.currentTimeMillis() - startTime;
+ sink.publishWriteTiming(region, column, time);
+ } catch (Exception e) {
+ sink.publishWriteFailure(region, column, e);
+ }
+ }
+ table.close();
+ } catch (IOException e) {
+ sink.publishWriteFailure(region, e);
}
return null;
}
@@ -269,11 +351,12 @@ public final class Canary implements Tool {
}
sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime());
} catch (TableNotFoundException tnfe) {
+ LOG.error("Table may be deleted", tnfe);
// This is ignored because it doesn't imply that the regionserver is dead
} catch (TableNotEnabledException tnee) {
// This is considered a success since we got a response.
LOG.debug("The targeted table was disabled. Assuming success.");
- } catch (DoNotRetryIOException | NeedUnmanagedConnectionException dnrioe) {
+ } catch (DoNotRetryIOException dnrioe) {
sink.publishReadFailure(tableName.getNameAsString(), serverName);
LOG.error(dnrioe);
} catch (IOException e) {
@@ -284,6 +367,7 @@ public final class Canary implements Tool {
try {
table.close();
} catch (IOException e) {/* DO NOTHING */
+ LOG.error("Close table failed", e);
}
}
scan = null;
@@ -302,11 +386,15 @@ public final class Canary implements Tool {
private static final long DEFAULT_INTERVAL = 6000;
private static final long DEFAULT_TIMEOUT = 600000; // 10 mins
-
private static final int MAX_THREADS_NUM = 16; // #threads to contact regions
private static final Log LOG = LogFactory.getLog(Canary.class);
+ public static final TableName DEFAULT_WRITE_TABLE_NAME = TableName.valueOf(
+ NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR, "canary");
+
+ private static final String CANARY_TABLE_FAMILY_NAME = "Test";
+
private Configuration conf = null;
private long interval = 0;
private Sink sink = null;
@@ -315,6 +403,9 @@ public final class Canary implements Tool {
private long timeout = DEFAULT_TIMEOUT;
private boolean failOnError = true;
private boolean regionServerMode = false;
+ private boolean writeSniffing = false;
+ private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME;
+
private ExecutorService executor; // threads to retrieve data from regionservers
public Canary() {
@@ -336,11 +427,8 @@ public final class Canary implements Tool {
this.conf = conf;
}
- @Override
- public int run(String[] args) throws Exception {
+ private int parseArgs(String[] args) {
int index = -1;
- ChoreService choreService = null;
-
// Process command line args
for (int i = 0; i < args.length; i++) {
String cmd = args[i];
@@ -375,6 +463,8 @@ public final class Canary implements Tool {
}
} else if(cmd.equals("-regionserver")) {
this.regionServerMode = true;
+ } else if(cmd.equals("-writeSniffing")) {
+ this.writeSniffing = true;
} else if (cmd.equals("-e")) {
this.useRegExp = true;
} else if (cmd.equals("-t")) {
@@ -391,7 +481,14 @@ public final class Canary implements Tool {
System.err.println("-t needs a numeric value argument.");
printUsageAndExit();
}
+ } else if (cmd.equals("-writeTable")) {
+ i++;
+ if (i == args.length) {
+ System.err.println("-writeTable needs a string value argument.");
+ printUsageAndExit();
+ }
+ this.writeTableName = TableName.valueOf(args[i]);
} else if (cmd.equals("-f")) {
i++;
@@ -412,6 +509,13 @@ public final class Canary implements Tool {
index = i;
}
}
+ return index;
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ int index = parseArgs(args);
+ ChoreService choreService = null;
// Launches chore for refreshing kerberos credentials if security is enabled.
// Please see http://hbase.apache.org/book.html#_running_canary_in_a_kerberos_enabled_cluster
@@ -497,6 +601,9 @@ public final class Canary implements Tool {
System.err.println(" -f stop whole program if first error occurs," +
" default is true");
System.err.println(" -t timeout for a check, default is 600000 (milisecs)");
+ System.err.println(" -writeSniffing enable the write sniffing in canary");
+ System.err.println(" -writeTable The table used for write sniffing."
+ + " Default is hbase:canary");
System.exit(USAGE_EXIT_CODE);
}
@@ -523,7 +630,8 @@ public final class Canary implements Tool {
(ExtendedSink) this.sink, this.executor);
} else {
monitor =
- new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor);
+ new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor,
+ this.writeSniffing, this.writeTableName);
}
return monitor;
}
@@ -586,10 +694,34 @@ public final class Canary implements Tool {
// a monitor for region mode
private static class RegionMonitor extends Monitor {
+ // 10 minutes
+ private static final int DEFAULT_WRITE_TABLE_CHECK_PERIOD = 10 * 60 * 1000;
+ // 1 days
+ private static final int DEFAULT_WRITE_DATA_TTL = 24 * 60 * 60;
+
+ private long lastCheckTime = -1;
+ private boolean writeSniffing;
+ private TableName writeTableName;
+ private int writeDataTTL;
+ private float regionsLowerLimit;
+ private float regionsUpperLimit;
+ private int checkPeriod;
public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
- Sink sink, ExecutorService executor) {
+ Sink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName) {
super(connection, monitorTargets, useRegExp, sink, executor);
+ Configuration conf = connection.getConfiguration();
+ this.writeSniffing = writeSniffing;
+ this.writeTableName = writeTableName;
+ this.writeDataTTL =
+ conf.getInt(HConstants.HBASE_CANARY_WRITE_DATA_TTL_KEY, DEFAULT_WRITE_DATA_TTL);
+ this.regionsLowerLimit =
+ conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY, 1.0f);
+ this.regionsUpperLimit =
+ conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_UPPERLIMIT_KEY, 1.5f);
+ this.checkPeriod =
+ conf.getInt(HConstants.HBASE_CANARY_WRITE_TABLE_CHECK_PERIOD_KEY,
+ DEFAULT_WRITE_TABLE_CHECK_PERIOD);
}
@Override
@@ -601,11 +733,26 @@ public final class Canary implements Tool {
String[] tables = generateMonitorTables(this.targets);
this.initialized = true;
for (String table : tables) {
- taskFutures.addAll(Canary.sniff(admin, sink, table, executor));
+ taskFutures.addAll(Canary.sniff(admin, sink, table, executor, TaskType.READ));
}
} else {
- taskFutures.addAll(sniff());
+ taskFutures.addAll(sniff(TaskType.READ));
}
+
+ if (writeSniffing) {
+ if (EnvironmentEdgeManager.currentTime() - lastCheckTime > checkPeriod) {
+ try {
+ checkWriteTableDistribution();
+ } catch (IOException e) {
+ LOG.error("Check canary table distribution failed!", e);
+ }
+ lastCheckTime = EnvironmentEdgeManager.currentTime();
+ }
+ // sniff canary table with write operation
+ taskFutures.addAll(Canary.sniff(admin, sink,
+ admin.getTableDescriptor(writeTableName), executor, TaskType.WRITE));
+ }
+
for (Future future : taskFutures) {
try {
future.get();
@@ -661,25 +808,91 @@ public final class Canary implements Tool {
/*
* canary entry point to monitor all the tables.
*/
- private List> sniff() throws Exception {
+ private List> sniff(TaskType taskType) throws Exception {
List> taskFutures = new LinkedList>();
for (HTableDescriptor table : admin.listTables()) {
- if (admin.isTableEnabled(table.getTableName())) {
- taskFutures.addAll(Canary.sniff(admin, sink, table, executor));
+ if (admin.isTableEnabled(table.getTableName())
+ && (!table.getTableName().equals(writeTableName))) {
+ taskFutures.addAll(Canary.sniff(admin, sink, table, executor, taskType));
}
}
return taskFutures;
}
+
+ private void checkWriteTableDistribution() throws IOException {
+ if (!admin.tableExists(writeTableName)) {
+ int numberOfServers = admin.getClusterStatus().getServers().size();
+ if (numberOfServers == 0) {
+ throw new IllegalStateException("No live regionservers");
+ }
+ createWriteTable(numberOfServers);
+ }
+
+ if (!admin.isTableEnabled(writeTableName)) {
+ admin.enableTable(writeTableName);
+ }
+
+ int numberOfServers = admin.getClusterStatus().getServers().size();
+ List locations;
+ RegionLocator locator = connection.getRegionLocator(writeTableName);
+ try {
+ locations = locator.getAllRegionLocations();
+ } finally {
+ locator.close();
+ }
+ int numberOfRegions = locations.size();
+ if (numberOfRegions < numberOfServers * regionsLowerLimit
+ || numberOfRegions > numberOfServers * regionsUpperLimit) {
+ admin.disableTable(writeTableName);
+ admin.deleteTable(writeTableName);
+ createWriteTable(numberOfServers);
+ }
+ HashSet serverSet = new HashSet();
+ for (HRegionLocation location: locations) {
+ serverSet.add(location.getServerName());
+ }
+ int numberOfCoveredServers = serverSet.size();
+ if (numberOfCoveredServers < numberOfServers) {
+ admin.balancer();
+ }
+ }
+
+ private void createWriteTable(int numberOfServers) throws IOException {
+ int numberOfRegions = (int)(numberOfServers * regionsLowerLimit);
+ LOG.info("Number of live regionservers: " + numberOfServers + ", "
+ + "pre-splitting the canary table into " + numberOfRegions + " regions "
+ + "(current lower limi of regions per server is " + regionsLowerLimit
+ + " and you can change it by config: "
+ + HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )");
+ HTableDescriptor desc = new HTableDescriptor(writeTableName);
+ HColumnDescriptor family = new HColumnDescriptor(CANARY_TABLE_FAMILY_NAME);
+ family.setMaxVersions(1);
+ family.setTimeToLive(writeDataTTL);
+
+ desc.addFamily(family);
+ byte[][] splits = new RegionSplitter.HexStringSplit().split(numberOfRegions);
+ admin.createTable(desc, splits);
+ }
}
/**
* Canary entry point for specified table.
* @throws Exception
*/
- public static void sniff(final Admin admin, TableName tableName) throws Exception {
+ public static void sniff(final Admin admin, TableName tableName)
+ throws Exception {
+ sniff(admin, tableName, TaskType.READ);
+ }
+
+ /**
+ * Canary entry point for specified table with task type(read/write)
+ * @throws Exception
+ */
+ public static void sniff(final Admin admin, TableName tableName, TaskType taskType)
+ throws Exception {
List> taskFutures =
Canary.sniff(admin, new StdOutSink(), tableName.getNameAsString(),
- new ScheduledThreadPoolExecutor(1));
+ new ScheduledThreadPoolExecutor(1), taskType);
for (Future future : taskFutures) {
future.get();
}
@@ -690,10 +903,10 @@ public final class Canary implements Tool {
* @throws Exception
*/
private static List> sniff(final Admin admin, final Sink sink, String tableName,
- ExecutorService executor) throws Exception {
+ ExecutorService executor, TaskType taskType) throws Exception {
if (admin.isTableEnabled(TableName.valueOf(tableName))) {
return Canary.sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)),
- executor);
+ executor, taskType);
} else {
LOG.warn(String.format("Table %s is not enabled", tableName));
}
@@ -704,7 +917,7 @@ public final class Canary implements Tool {
* Loops over regions that owns this table, and output some information abouts the state.
*/
private static List> sniff(final Admin admin, final Sink sink,
- HTableDescriptor tableDesc, ExecutorService executor) throws Exception {
+ HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType) throws Exception {
Table table = null;
try {
table = admin.getConnection().getTable(tableDesc.getTableName());
@@ -714,7 +927,7 @@ public final class Canary implements Tool {
List tasks = new ArrayList();
try {
for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) {
- tasks.add(new RegionTask(admin.getConnection(), region, sink));
+ tasks.add(new RegionTask(admin.getConnection(), region, sink, taskType));
}
} finally {
table.close();
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index b8018b6661d..a4dbccb0a4e 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -92,6 +92,8 @@ Usage: bin/hbase org.apache.hadoop.hbase.tool.Canary [opts] [table1 [table2]...]
which means the region/regionserver is regular expression pattern
-f stop whole program if first error occurs, default is true
-t timeout for a check, default is 600000 (milliseconds)
+ -writeSniffing enable the write sniffing in canary
+ -writeTable The table used for write sniffing. Default is hbase:canary
----
This tool will return non zero error codes to user for collaborating with other monitoring tools, such as Nagios.
@@ -193,6 +195,25 @@ This run sets the timeout value to 60 seconds, the default value is 600 seconds.
$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -t 600000
----
+==== Enable write sniffing in canary
+
+By default, the canary tool only check the read operations, it's hard to find the problem in the
+write path. To enable the write sniffing, you can run canary with the `-writeSniffing` option.
+When the write sniffing is enabled, the canary tool will create a hbase table and make sure the
+regions of the table distributed on all region servers. In each sniffing period, the canary will
+try to put data to these regions to check the write availability of each region server.
+----
+$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing
+----
+
+The default write table is `hbase:canary` and can be specified by the option `-writeTable`.
+----
+$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -writeSniffing -writeTable ns:canary
+----
+
+The default value size of each put is 10 bytes and you can set it by the config key:
+`hbase.canary.write.value.size`.
+
==== Running Canary in a Kerberos-enabled Cluster
To run Canary in a Kerberos-enabled cluster, configure the following two properties in _hbase-site.xml_: