From f71f1cdfa0c7e57b74521f92179bf0d7eb8ab270 Mon Sep 17 00:00:00 2001 From: niuyulin Date: Wed, 3 Jun 2020 09:15:10 +0800 Subject: [PATCH] HBASE-24412 Canary support check only one column family per RegionTask (#1817) Signed-off-by: Guanghao Zhang --- .../org/apache/hadoop/hbase/HConstants.java | 1 + .../apache/hadoop/hbase/tool/CanaryTool.java | 193 ++++++++++-------- .../hadoop/hbase/tool/TestCanaryTool.java | 53 ++++- 3 files changed, 161 insertions(+), 86 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 9fb4704f254..03da2cc3a66 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1455,6 +1455,7 @@ public final class HConstants { public static final String HBASE_CANARY_READ_RAW_SCAN_KEY = "hbase.canary.read.raw.enabled"; + public static final String HBASE_CANARY_READ_ALL_CF = "hbase.canary.read.all.column.famliy"; /** * Configuration keys for programmatic JAAS configuration for secured ZK interaction */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java index ed59ab70d6b..a261ed7c925 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java @@ -45,6 +45,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; import java.util.regex.Matcher; @@ -481,9 +482,11 @@ public class CanaryTool implements Tool, Canary { private boolean rawScanEnabled; private ServerName serverName; private LongAdder readWriteLatency; + private boolean readAllCF; RegionTask(Connection connection, RegionInfo region, ServerName serverName, - RegionStdOutSink sink, TaskType taskType, boolean rawScanEnabled, LongAdder rwLatency) { + RegionStdOutSink sink, TaskType taskType, boolean rawScanEnabled, LongAdder rwLatency, + boolean readAllCF) { this.connection = connection; this.region = region; this.serverName = serverName; @@ -491,20 +494,76 @@ public class CanaryTool implements Tool, Canary { this.taskType = taskType; this.rawScanEnabled = rawScanEnabled; this.readWriteLatency = rwLatency; + this.readAllCF = readAllCF; } @Override public Void call() { switch (taskType) { - case READ: - return read(); - case WRITE: - return write(); - default: - return read(); + case READ: + return read(); + case WRITE: + return write(); + default: + return read(); } } + private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) { + byte[] startKey = null; + Get get = null; + Scan scan = null; + ResultScanner rs = null; + StopWatch stopWatch = new StopWatch(); + startKey = region.getStartKey(); + // Can't do a get on empty start row so do a Scan of first element if any instead. + if (startKey.length > 0) { + get = new Get(startKey); + get.setCacheBlocks(false); + get.setFilter(new FirstKeyOnlyFilter()); + get.addFamily(column.getName()); + } else { + scan = new Scan(); + LOG.debug("rawScan {} for {}", rawScanEnabled, region.getTable()); + scan.setRaw(rawScanEnabled); + scan.setCaching(1); + scan.setCacheBlocks(false); + scan.setFilter(new FirstKeyOnlyFilter()); + scan.addFamily(column.getName()); + scan.setMaxResultSize(1L); + scan.setOneRowLimit(); + } + LOG.debug("Reading from {} {} {} {}", region.getTable(), region.getRegionNameAsString(), + column.getNameAsString(), Bytes.toStringBinary(startKey)); + try { + stopWatch.start(); + if (startKey.length > 0) { + table.get(get); + } else { + rs = table.getScanner(scan); + rs.next(); + } + stopWatch.stop(); + this.readWriteLatency.add(stopWatch.getTime()); + sink.publishReadTiming(serverName, region, column, stopWatch.getTime()); + } catch (Exception e) { + sink.publishReadFailure(serverName, region, column, e); + sink.updateReadFailures(region.getRegionNameAsString(), + serverName == null ? "NULL" : serverName.getHostname()); + } finally { + if (rs != null) { + rs.close(); + } + } + return null; + } + + private ColumnFamilyDescriptor randomPickOneColumnFamily(ColumnFamilyDescriptor[] cfs) { + int size = cfs.length; + return cfs[ThreadLocalRandom.current().nextInt(size)]; + + } + public Void read() { Table table = null; TableDescriptor tableDesc = null; @@ -525,56 +584,12 @@ public class CanaryTool implements Tool, Canary { return null; } - byte[] startKey = null; - Get get = null; - Scan scan = null; - ResultScanner rs = null; - StopWatch stopWatch = new StopWatch(); - for (ColumnFamilyDescriptor column : tableDesc.getColumnFamilies()) { - stopWatch.reset(); - startKey = region.getStartKey(); - // Can't do a get on empty start row so do a Scan of first element if any instead. - if (startKey.length > 0) { - get = new Get(startKey); - get.setCacheBlocks(false); - get.setFilter(new FirstKeyOnlyFilter()); - get.addFamily(column.getName()); - } else { - scan = new Scan(); - LOG.debug("rawScan {} for {}", rawScanEnabled, tableDesc.getTableName()); - scan.setRaw(rawScanEnabled); - scan.setCaching(1); - scan.setCacheBlocks(false); - scan.setFilter(new FirstKeyOnlyFilter()); - scan.addFamily(column.getName()); - scan.setMaxResultSize(1L); - scan.setOneRowLimit(); - } - LOG.debug("Reading from {} {} {} {}", tableDesc.getTableName(), - region.getRegionNameAsString(), column.getNameAsString(), - Bytes.toStringBinary(startKey)); - try { - stopWatch.start(); - if (startKey.length > 0) { - table.get(get); - } else { - rs = table.getScanner(scan); - rs.next(); - } - stopWatch.stop(); - this.readWriteLatency.add(stopWatch.getTime()); - sink.publishReadTiming(serverName, region, column, stopWatch.getTime()); - } catch (Exception e) { - sink.publishReadFailure(serverName, region, column, e); - sink.updateReadFailures(region.getRegionNameAsString(), - serverName == null? "NULL": serverName.getHostname()); - } finally { - if (rs != null) { - rs.close(); - } - scan = null; - get = null; + if (readAllCF) { + for (ColumnFamilyDescriptor column : tableDesc.getColumnFamilies()) { + readColumnFamily(table, column); } + } else { + readColumnFamily(table, randomPickOneColumnFamily(tableDesc.getColumnFamilies())); } try { table.close(); @@ -621,7 +636,7 @@ public class CanaryTool implements Tool, Canary { table.close(); } catch (IOException e) { sink.publishWriteFailure(serverName, region, e); - sink.updateWriteFailures(region.getRegionNameAsString(), serverName.getHostname() ); + sink.updateWriteFailures(region.getRegionNameAsString(), serverName.getHostname()); } return null; } @@ -818,18 +833,15 @@ public class CanaryTool implements Tool, Canary { long permittedFailures = 0; boolean regionServerAllRegions = false, writeSniffing = false; String readTableTimeoutsStr = null; - // Process command line args for (int i = 0; i < args.length; i++) { String cmd = args[i]; - if (cmd.startsWith("-")) { if (index >= 0) { // command line args must be in the form: [opts] [table 1 [table 2 ...]] System.err.println("Invalid command line options"); printUsageAndExit(); } - if (cmd.equals("-help") || cmd.equals("-h")) { // user asked for help, print the help and quit. printUsageAndExit(); @@ -844,7 +856,6 @@ public class CanaryTool implements Tool, Canary { System.err.println("-interval takes a numeric seconds value argument."); printUsageAndExit(); } - try { interval = Long.parseLong(args[i]) * 1000; } catch (NumberFormatException e) { @@ -905,7 +916,6 @@ public class CanaryTool implements Tool, Canary { conf.set(HBASE_CANARY_REGION_WRITE_TABLE_NAME, args[i]); } else if (cmd.equals("-f")) { i++; - if (i == args.length) { System.err .println("-f needs a boolean value argument (true|false)."); @@ -915,7 +925,6 @@ public class CanaryTool implements Tool, Canary { conf.setBoolean(HBASE_CANARY_FAIL_ON_ERROR, Boolean.parseBoolean(args[i])); } else if (cmd.equals("-readTableTimeouts")) { i++; - if (i == args.length) { System.err.println("-readTableTimeouts needs a comma-separated list of read " + "millisecond timeouts per table (without spaces)."); @@ -1048,7 +1057,9 @@ public class CanaryTool implements Tool, Canary { return monitor.errorCode; } } finally { - if (monitor != null) monitor.close(); + if (monitor != null) { + monitor.close(); + } } Thread.sleep(interval); @@ -1316,8 +1327,8 @@ public class CanaryTool implements Tool, Canary { if (errorCode != 0) { return true; } - if (treatFailureAsError && - (sink.getReadFailureCount() > allowedFailures || sink.getWriteFailureCount() > allowedFailures)) { + if (treatFailureAsError && (sink.getReadFailureCount() > allowedFailures + || sink.getWriteFailureCount() > allowedFailures)) { LOG.error("Too many failures detected, treating failure as error, failing the Canary."); errorCode = FAILURE_EXIT_CODE; return true; @@ -1327,12 +1338,16 @@ public class CanaryTool implements Tool, Canary { @Override public void close() throws IOException { - if (this.admin != null) this.admin.close(); + if (this.admin != null) { + this.admin.close(); + } } protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink, ExecutorService executor, boolean treatFailureAsError, long allowedFailures) { - if (null == connection) throw new IllegalArgumentException("connection shall not be null"); + if (null == connection) { + throw new IllegalArgumentException("connection shall not be null"); + } this.connection = connection; this.targets = monitorTargets; @@ -1379,6 +1394,7 @@ public class CanaryTool implements Tool, Canary { private float regionsUpperLimit; private int checkPeriod; private boolean rawScanEnabled; + private boolean readAllCF; /** * This is a timeout per table. If read of each region in the table aggregated takes longer @@ -1410,6 +1426,7 @@ public class CanaryTool implements Tool, Canary { this.rawScanEnabled = conf.getBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, false); this.configuredReadTableTimeouts = new HashMap<>(configuredReadTableTimeouts); this.configuredWriteTableTimeout = configuredWriteTableTimeout; + this.readAllCF = conf.getBoolean(HConstants.HBASE_CANARY_READ_ALL_CF, true); } private RegionStdOutSink getSink() { @@ -1441,7 +1458,7 @@ public class CanaryTool implements Tool, Canary { for (String table : tables) { LongAdder readLatency = regionSink.initializeAndGetReadLatencyForTable(table); taskFutures.addAll(CanaryTool.sniff(admin, regionSink, table, executor, TaskType.READ, - this.rawScanEnabled, readLatency)); + this.rawScanEnabled, readLatency, readAllCF)); } } else { taskFutures.addAll(sniff(TaskType.READ, regionSink)); @@ -1459,8 +1476,9 @@ public class CanaryTool implements Tool, Canary { // sniff canary table with write operation regionSink.initializeWriteLatency(); LongAdder writeTableLatency = regionSink.getWriteLatency(); - taskFutures.addAll(CanaryTool.sniff(admin, regionSink, admin.getDescriptor(writeTableName), - executor, TaskType.WRITE, this.rawScanEnabled, writeTableLatency)); + taskFutures + .addAll(CanaryTool.sniff(admin, regionSink, admin.getDescriptor(writeTableName), + executor, TaskType.WRITE, this.rawScanEnabled, writeTableLatency, readAllCF)); } for (Future future : taskFutures) { @@ -1492,7 +1510,8 @@ public class CanaryTool implements Tool, Canary { long actualWriteLatency = regionSink.getWriteLatency().longValue(); LOG.info("Write operation for {} took {}ms. Configured write timeout {}ms.", writeTableStringName, actualWriteLatency, this.configuredWriteTableTimeout); - // Check that the writeTable write operation latency does not exceed the configured timeout. + // Check that the writeTable write operation latency does not exceed the configured + // timeout. if (actualWriteLatency > this.configuredWriteTableTimeout) { LOG.error("Write operation for {} exceeded the configured write timeout.", writeTableStringName); @@ -1564,8 +1583,8 @@ public class CanaryTool implements Tool, Canary { (!td.getTableName().equals(writeTableName))) { LongAdder readLatency = regionSink.initializeAndGetReadLatencyForTable(td.getTableName().getNameAsString()); - taskFutures.addAll(CanaryTool.sniff(admin, sink, td, executor, taskType, this.rawScanEnabled, - readLatency)); + taskFutures.addAll(CanaryTool.sniff(admin, sink, td, executor, taskType, + this.rawScanEnabled, readLatency, readAllCF)); } } return taskFutures; @@ -1628,15 +1647,15 @@ public class CanaryTool implements Tool, Canary { /** * Canary entry point for specified table. - * @throws Exception + * @throws Exception exception */ private static List> sniff(final Admin admin, final Sink sink, String tableName, - ExecutorService executor, TaskType taskType, boolean rawScanEnabled, LongAdder readLatency) - throws Exception { + ExecutorService executor, TaskType taskType, boolean rawScanEnabled, LongAdder readLatency, + boolean readAllCF) throws Exception { LOG.debug("Checking table is enabled and getting table descriptor for table {}", tableName); if (admin.isTableEnabled(TableName.valueOf(tableName))) { return CanaryTool.sniff(admin, sink, admin.getDescriptor(TableName.valueOf(tableName)), - executor, taskType, rawScanEnabled, readLatency); + executor, taskType, rawScanEnabled, readLatency, readAllCF); } else { LOG.warn("Table {} is not enabled", tableName); } @@ -1648,7 +1667,7 @@ public class CanaryTool implements Tool, Canary { */ private static List> sniff(final Admin admin, final Sink sink, TableDescriptor tableDesc, ExecutorService executor, TaskType taskType, - boolean rawScanEnabled, LongAdder rwLatency) throws Exception { + boolean rawScanEnabled, LongAdder rwLatency, boolean readAllCF) throws Exception { LOG.debug("Reading list of regions for table {}", tableDesc.getTableName()); try (Table table = admin.getConnection().getTable(tableDesc.getTableName())) { List tasks = new ArrayList<>(); @@ -1662,7 +1681,7 @@ public class CanaryTool implements Tool, Canary { ServerName rs = location.getServerName(); RegionInfo region = location.getRegion(); tasks.add(new RegionTask(admin.getConnection(), region, rs, (RegionStdOutSink)sink, - taskType, rawScanEnabled, rwLatency)); + taskType, rawScanEnabled, rwLatency, readAllCF)); Map> regionMap = ((RegionStdOutSink) sink).getRegionMap(); regionMap.put(region.getRegionNameAsString(), new ArrayList()); } @@ -1696,9 +1715,10 @@ public class CanaryTool implements Tool, Canary { hosts.add(server.toString()); } if (allowedFailures > (hosts.size() - 1) / 2) { - LOG.warn("Confirm allowable number of failed ZooKeeper nodes, as quorum will " + - "already be lost. Setting of {} failures is unexpected for {} ensemble size.", - allowedFailures, hosts.size()); + LOG.warn( + "Confirm allowable number of failed ZooKeeper nodes, as quorum will " + + "already be lost. Setting of {} failures is unexpected for {} ensemble size.", + allowedFailures, hosts.size()); } } @@ -1791,7 +1811,9 @@ public class CanaryTool implements Tool, Canary { return false; } - if (this.targets == null || this.targets.length == 0) return true; + if (this.targets == null || this.targets.length == 0) { + return true; + } for (String target : this.targets) { for (TableName tableName : tableNames) { @@ -1809,7 +1831,8 @@ public class CanaryTool implements Tool, Canary { return foundTableNames.isEmpty(); } - private void monitorRegionServers(Map> rsAndRMap, RegionServerStdOutSink regionServerSink) { + private void monitorRegionServers(Map> rsAndRMap, + RegionServerStdOutSink regionServerSink) { List tasks = new ArrayList<>(); Map successMap = new HashMap<>(); Random rand = new Random(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java index 9680502bf6c..d87fc3e2a85 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java @@ -35,7 +35,10 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; + +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.ServerName; @@ -129,6 +132,53 @@ public class TestCanaryTool { isA(ColumnFamilyDescriptor.class), anyLong()); } + @Test + public void testCanaryRegionTaskReadAllCF() throws Exception { + final TableName tableName = TableName.valueOf(name.getMethodName()); + Table table = testingUtility.createTable(tableName, + new byte[][] { Bytes.toBytes("f1"), Bytes.toBytes("f2") }); + // insert some test rows + for (int i = 0; i < 1000; i++) { + byte[] iBytes = Bytes.toBytes(i); + Put p = new Put(iBytes); + p.addColumn(Bytes.toBytes("f1"), COLUMN, iBytes); + p.addColumn(Bytes.toBytes("f2"), COLUMN, iBytes); + table.put(p); + } + Configuration configuration = HBaseConfiguration.create(testingUtility.getConfiguration()); + String[] args = { "-t", "10000", "testCanaryRegionTaskReadAllCF" }; + ExecutorService executor = new ScheduledThreadPoolExecutor(1); + for (boolean readAllCF : new boolean[] { true, false }) { + CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink()); + CanaryTool canary = new CanaryTool(executor, sink); + configuration.setBoolean(HConstants.HBASE_CANARY_READ_ALL_CF, readAllCF); + assertEquals(0, ToolRunner.run(configuration, canary, args)); + // the test table has two column family. If readAllCF set true, + // we expect read count is double of region count + int expectedReadCount = + readAllCF ? 2 * sink.getTotalExpectedRegions() : sink.getTotalExpectedRegions(); + assertEquals("canary region success count should equal total expected read count", + expectedReadCount, sink.getReadSuccessCount()); + Map> regionMap = sink.getRegionMap(); + assertFalse("verify region map has size > 0", regionMap.isEmpty()); + + for (String regionName : regionMap.keySet()) { + for (CanaryTool.RegionTaskResult res : regionMap.get(regionName)) { + assertNotNull("verify getRegionNameAsString()", regionName); + assertNotNull("verify getRegionInfo()", res.getRegionInfo()); + assertNotNull("verify getTableName()", res.getTableName()); + assertNotNull("verify getTableNameAsString()", res.getTableNameAsString()); + assertNotNull("verify getServerName()", res.getServerName()); + assertNotNull("verify getServerNameAsString()", res.getServerNameAsString()); + assertNotNull("verify getColumnFamily()", res.getColumnFamily()); + assertNotNull("verify getColumnFamilyNameAsString()", res.getColumnFamilyNameAsString()); + assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess()); + assertTrue("read took some time", res.getReadLatency() > -1); + } + } + } + } + @Test public void testCanaryRegionTaskResult() throws Exception { TableName tableName = TableName.valueOf("testCanaryRegionTaskResult"); @@ -191,7 +241,8 @@ public class TestCanaryTool { // mockAppender.doAppend( // // ); - // -> at org.apache.hadoop.hbase.tool.TestCanaryTool.testReadTableTimeouts(TestCanaryTool.java:216) + // -> at org.apache.hadoop.hbase.tool.TestCanaryTool + // .testReadTableTimeouts(TestCanaryTool.java:216) // Actual invocations have different arguments: // mockAppender.doAppend( // org.apache.log4j.spi.LoggingEvent@2055cfc1