From 78c64c360fe46c7d6da6a831a1f91f2567147a83 Mon Sep 17 00:00:00 2001 From: Karan Mehta Date: Wed, 26 Apr 2017 17:14:02 -0700 Subject: [PATCH] HBASE-17965 Canary tool should print the regionserver name on failure Signed-off-by: Andrew Purtell Conflicts: hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java --- .../org/apache/hadoop/hbase/tool/Canary.java | 115 +++++------------- .../hadoop/hbase/tool/TestCanaryTool.java | 7 +- 2 files changed, 32 insertions(+), 90 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java index c0f545d1f30..137e5da0512 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java @@ -114,15 +114,15 @@ public final class Canary implements Tool { public interface Sink { public long getReadFailureCount(); public long incReadFailureCount(); - public void publishReadFailure(HRegionInfo region, Exception e); - public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); + public void publishReadFailure(ServerName serverName, HRegionInfo region, Exception e); + public void publishReadFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e); public void updateReadFailedHostList(HRegionInfo region, String serverName); public Map getReadFailures(); - public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); + public void publishReadTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime); public long getWriteFailureCount(); - public void publishWriteFailure(HRegionInfo region, Exception e); - public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e); - public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime); + public void publishWriteFailure(ServerName serverName, HRegionInfo region, Exception e); + public void publishWriteFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e); + public void publishWriteTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime); public void updateWriteFailedHostList(HRegionInfo region, String serverName); public Map getWriteFailures(); } @@ -153,16 +153,16 @@ public final class Canary implements Tool { } @Override - public void publishReadFailure(HRegionInfo region, Exception e) { + public void publishReadFailure(ServerName serverName, HRegionInfo region, Exception e) { readFailureCount.incrementAndGet(); - LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e); + LOG.error(String.format("read from region %s on regionserver %s failed", region.getRegionNameAsString(), serverName), e); } @Override - public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { + public void publishReadFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e) { readFailureCount.incrementAndGet(); - LOG.error(String.format("read from region %s column family %s failed", - region.getRegionNameAsString(), column.getNameAsString()), e); + LOG.error(String.format("read from region %s on regionserver %s column family %s failed", + region.getRegionNameAsString(), serverName, column.getNameAsString()), e); } @Override @@ -171,9 +171,9 @@ public final class Canary implements Tool { } @Override - public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { - LOG.info(String.format("read from region %s column family %s in %dms", - region.getRegionNameAsString(), column.getNameAsString(), msTime)); + public void publishReadTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime) { + LOG.info(String.format("read from region %s on regionserver %s column family %s in %dms", + region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime)); } @Override @@ -192,22 +192,22 @@ public final class Canary implements Tool { } @Override - public void publishWriteFailure(HRegionInfo region, Exception e) { + public void publishWriteFailure(ServerName serverName, HRegionInfo region, Exception e) { writeFailureCount.incrementAndGet(); - LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e); + LOG.error(String.format("write to region %s on regionserver %s failed", region.getRegionNameAsString(), serverName), e); } @Override - public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { + public void publishWriteFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e) { writeFailureCount.incrementAndGet(); - LOG.error(String.format("write to region %s column family %s failed", - region.getRegionNameAsString(), column.getNameAsString()), e); + LOG.error(String.format("write to region %s on regionserver %s column family %s failed", + region.getRegionNameAsString(), serverName, column.getNameAsString()), e); } @Override - public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { - LOG.info(String.format("write to region %s column family %s in %dms", - region.getRegionNameAsString(), column.getNameAsString(), msTime)); + public void publishWriteTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime) { + LOG.info(String.format("write to region %s on regionserver %s column family %s in %dms", + region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime)); } @Override @@ -330,7 +330,7 @@ public final class Canary implements Tool { tableDesc = table.getTableDescriptor(); } catch (IOException e) { LOG.debug("sniffRegion failed", e); - sink.publishReadFailure(region, e); + sink.publishReadFailure(serverName, region, e); if (table != null) { try { table.close(); @@ -384,9 +384,9 @@ public final class Canary implements Tool { rs.next(); } stopWatch.stop(); - sink.publishReadTiming(region, column, stopWatch.getTime()); + sink.publishReadTiming(serverName, region, column, stopWatch.getTime()); } catch (Exception e) { - sink.publishReadFailure(region, column, e); + sink.publishReadFailure(serverName, region, column, e); sink.updateReadFailedHostList(region, serverName.getHostname()); } finally { if (rs != null) { @@ -436,14 +436,14 @@ public final class Canary implements Tool { long startTime = System.currentTimeMillis(); table.put(put); long time = System.currentTimeMillis() - startTime; - sink.publishWriteTiming(region, column, time); + sink.publishWriteTiming(serverName, region, column, time); } catch (Exception e) { - sink.publishWriteFailure(region, column, e); + sink.publishWriteFailure(serverName, region, column, e); } } table.close(); } catch (IOException e) { - sink.publishWriteFailure(region, e); + sink.publishWriteFailure(serverName, region, e); sink.updateWriteFailedHostList(region, serverName.getHostname()); } return null; @@ -1168,65 +1168,6 @@ public final class Canary implements Tool { return executor.invokeAll(tasks); } - /* - * For each column family of the region tries to get one row and outputs the latency, or the - * failure. - */ - private static void sniffRegion( - final Admin admin, - final Sink sink, - HRegionInfo region, - Table table) throws Exception { - HTableDescriptor tableDesc = table.getTableDescriptor(); - byte[] startKey = null; - Get get = null; - Scan scan = null; - ResultScanner rs = null; - StopWatch stopWatch = new StopWatch(); - for (HColumnDescriptor column : tableDesc.getColumnFamilies()) { - stopWatch.reset(); - startKey = region.getStartKey(); - // Can't do a get on empty start row so do a Scan of first element if any instead. - if (startKey.length > 0) { - get = new Get(startKey); - get.setCacheBlocks(false); - get.setFilter(new FirstKeyOnlyFilter()); - get.addFamily(column.getName()); - } else { - scan = new Scan(); - scan.setRaw(true); - scan.setCaching(1); - scan.setCacheBlocks(false); - scan.setFilter(new FirstKeyOnlyFilter()); - scan.addFamily(column.getName()); - scan.setMaxResultSize(1L); - } - - try { - if (startKey.length > 0) { - stopWatch.start(); - table.get(get); - stopWatch.stop(); - sink.publishReadTiming(region, column, stopWatch.getTime()); - } else { - stopWatch.start(); - rs = table.getScanner(scan); - stopWatch.stop(); - sink.publishReadTiming(region, column, stopWatch.getTime()); - } - } catch (Exception e) { - sink.publishReadFailure(region, column, e); - } finally { - if (rs != null) { - rs.close(); - } - scan = null; - get = null; - startKey = null; - } - } - } - // monitor for zookeeper mode private static class ZookeeperMonitor extends Monitor { private List hosts; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java index f910668d699..3ca91c95f2f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java @@ -1,5 +1,5 @@ /** - * +q * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; @@ -116,7 +117,7 @@ public class TestCanaryTool { ToolRunner.run(testingUtility.getConfiguration(), canary, args); assertEquals("verify no read error count", 0, canary.getReadFailures().size()); assertEquals("verify no write error count", 0, canary.getWriteFailures().size()); - verify(sink, atLeastOnce()).publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); + verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); } //no table created, so there should be no regions @@ -164,7 +165,7 @@ public class TestCanaryTool { conf.setBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, true); ToolRunner.run(conf, canary, args); verify(sink, atLeastOnce()) - .publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); + .publishReadTiming(isA(ServerName.class), isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); assertEquals("verify no read error count", 0, canary.getReadFailures().size()); }