HBASE-17965 Canary tool should print the regionserver name on failure
Signed-off-by: Andrew Purtell <apurtell@apache.org> Conflicts: hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
This commit is contained in:
parent
defc25c6d1
commit
78c64c360f
|
@ -114,15 +114,15 @@ public final class Canary implements Tool {
|
||||||
public interface Sink {
|
public interface Sink {
|
||||||
public long getReadFailureCount();
|
public long getReadFailureCount();
|
||||||
public long incReadFailureCount();
|
public long incReadFailureCount();
|
||||||
public void publishReadFailure(HRegionInfo region, Exception e);
|
public void publishReadFailure(ServerName serverName, HRegionInfo region, Exception e);
|
||||||
public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
|
public void publishReadFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e);
|
||||||
public void updateReadFailedHostList(HRegionInfo region, String serverName);
|
public void updateReadFailedHostList(HRegionInfo region, String serverName);
|
||||||
public Map<String,String> getReadFailures();
|
public Map<String,String> getReadFailures();
|
||||||
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
|
public void publishReadTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime);
|
||||||
public long getWriteFailureCount();
|
public long getWriteFailureCount();
|
||||||
public void publishWriteFailure(HRegionInfo region, Exception e);
|
public void publishWriteFailure(ServerName serverName, HRegionInfo region, Exception e);
|
||||||
public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
|
public void publishWriteFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e);
|
||||||
public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
|
public void publishWriteTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime);
|
||||||
public void updateWriteFailedHostList(HRegionInfo region, String serverName);
|
public void updateWriteFailedHostList(HRegionInfo region, String serverName);
|
||||||
public Map<String,String> getWriteFailures();
|
public Map<String,String> getWriteFailures();
|
||||||
}
|
}
|
||||||
|
@ -153,16 +153,16 @@ public final class Canary implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void publishReadFailure(HRegionInfo region, Exception e) {
|
public void publishReadFailure(ServerName serverName, HRegionInfo region, Exception e) {
|
||||||
readFailureCount.incrementAndGet();
|
readFailureCount.incrementAndGet();
|
||||||
LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
|
LOG.error(String.format("read from region %s on regionserver %s failed", region.getRegionNameAsString(), serverName), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
|
public void publishReadFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e) {
|
||||||
readFailureCount.incrementAndGet();
|
readFailureCount.incrementAndGet();
|
||||||
LOG.error(String.format("read from region %s column family %s failed",
|
LOG.error(String.format("read from region %s on regionserver %s column family %s failed",
|
||||||
region.getRegionNameAsString(), column.getNameAsString()), e);
|
region.getRegionNameAsString(), serverName, column.getNameAsString()), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -171,9 +171,9 @@ public final class Canary implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
|
public void publishReadTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime) {
|
||||||
LOG.info(String.format("read from region %s column family %s in %dms",
|
LOG.info(String.format("read from region %s on regionserver %s column family %s in %dms",
|
||||||
region.getRegionNameAsString(), column.getNameAsString(), msTime));
|
region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -192,22 +192,22 @@ public final class Canary implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void publishWriteFailure(HRegionInfo region, Exception e) {
|
public void publishWriteFailure(ServerName serverName, HRegionInfo region, Exception e) {
|
||||||
writeFailureCount.incrementAndGet();
|
writeFailureCount.incrementAndGet();
|
||||||
LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e);
|
LOG.error(String.format("write to region %s on regionserver %s failed", region.getRegionNameAsString(), serverName), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
|
public void publishWriteFailure(ServerName serverName, HRegionInfo region, HColumnDescriptor column, Exception e) {
|
||||||
writeFailureCount.incrementAndGet();
|
writeFailureCount.incrementAndGet();
|
||||||
LOG.error(String.format("write to region %s column family %s failed",
|
LOG.error(String.format("write to region %s on regionserver %s column family %s failed",
|
||||||
region.getRegionNameAsString(), column.getNameAsString()), e);
|
region.getRegionNameAsString(), serverName, column.getNameAsString()), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
|
public void publishWriteTiming(ServerName serverName, HRegionInfo region, HColumnDescriptor column, long msTime) {
|
||||||
LOG.info(String.format("write to region %s column family %s in %dms",
|
LOG.info(String.format("write to region %s on regionserver %s column family %s in %dms",
|
||||||
region.getRegionNameAsString(), column.getNameAsString(), msTime));
|
region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -330,7 +330,7 @@ public final class Canary implements Tool {
|
||||||
tableDesc = table.getTableDescriptor();
|
tableDesc = table.getTableDescriptor();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.debug("sniffRegion failed", e);
|
LOG.debug("sniffRegion failed", e);
|
||||||
sink.publishReadFailure(region, e);
|
sink.publishReadFailure(serverName, region, e);
|
||||||
if (table != null) {
|
if (table != null) {
|
||||||
try {
|
try {
|
||||||
table.close();
|
table.close();
|
||||||
|
@ -384,9 +384,9 @@ public final class Canary implements Tool {
|
||||||
rs.next();
|
rs.next();
|
||||||
}
|
}
|
||||||
stopWatch.stop();
|
stopWatch.stop();
|
||||||
sink.publishReadTiming(region, column, stopWatch.getTime());
|
sink.publishReadTiming(serverName, region, column, stopWatch.getTime());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
sink.publishReadFailure(region, column, e);
|
sink.publishReadFailure(serverName, region, column, e);
|
||||||
sink.updateReadFailedHostList(region, serverName.getHostname());
|
sink.updateReadFailedHostList(region, serverName.getHostname());
|
||||||
} finally {
|
} finally {
|
||||||
if (rs != null) {
|
if (rs != null) {
|
||||||
|
@ -436,14 +436,14 @@ public final class Canary implements Tool {
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
table.put(put);
|
table.put(put);
|
||||||
long time = System.currentTimeMillis() - startTime;
|
long time = System.currentTimeMillis() - startTime;
|
||||||
sink.publishWriteTiming(region, column, time);
|
sink.publishWriteTiming(serverName, region, column, time);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
sink.publishWriteFailure(region, column, e);
|
sink.publishWriteFailure(serverName, region, column, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
table.close();
|
table.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
sink.publishWriteFailure(region, e);
|
sink.publishWriteFailure(serverName, region, e);
|
||||||
sink.updateWriteFailedHostList(region, serverName.getHostname());
|
sink.updateWriteFailedHostList(region, serverName.getHostname());
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
@ -1168,65 +1168,6 @@ public final class Canary implements Tool {
|
||||||
return executor.invokeAll(tasks);
|
return executor.invokeAll(tasks);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* For each column family of the region tries to get one row and outputs the latency, or the
|
|
||||||
* failure.
|
|
||||||
*/
|
|
||||||
private static void sniffRegion(
|
|
||||||
final Admin admin,
|
|
||||||
final Sink sink,
|
|
||||||
HRegionInfo region,
|
|
||||||
Table table) throws Exception {
|
|
||||||
HTableDescriptor tableDesc = table.getTableDescriptor();
|
|
||||||
byte[] startKey = null;
|
|
||||||
Get get = null;
|
|
||||||
Scan scan = null;
|
|
||||||
ResultScanner rs = null;
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
|
||||||
for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
|
|
||||||
stopWatch.reset();
|
|
||||||
startKey = region.getStartKey();
|
|
||||||
// Can't do a get on empty start row so do a Scan of first element if any instead.
|
|
||||||
if (startKey.length > 0) {
|
|
||||||
get = new Get(startKey);
|
|
||||||
get.setCacheBlocks(false);
|
|
||||||
get.setFilter(new FirstKeyOnlyFilter());
|
|
||||||
get.addFamily(column.getName());
|
|
||||||
} else {
|
|
||||||
scan = new Scan();
|
|
||||||
scan.setRaw(true);
|
|
||||||
scan.setCaching(1);
|
|
||||||
scan.setCacheBlocks(false);
|
|
||||||
scan.setFilter(new FirstKeyOnlyFilter());
|
|
||||||
scan.addFamily(column.getName());
|
|
||||||
scan.setMaxResultSize(1L);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (startKey.length > 0) {
|
|
||||||
stopWatch.start();
|
|
||||||
table.get(get);
|
|
||||||
stopWatch.stop();
|
|
||||||
sink.publishReadTiming(region, column, stopWatch.getTime());
|
|
||||||
} else {
|
|
||||||
stopWatch.start();
|
|
||||||
rs = table.getScanner(scan);
|
|
||||||
stopWatch.stop();
|
|
||||||
sink.publishReadTiming(region, column, stopWatch.getTime());
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
sink.publishReadFailure(region, column, e);
|
|
||||||
} finally {
|
|
||||||
if (rs != null) {
|
|
||||||
rs.close();
|
|
||||||
}
|
|
||||||
scan = null;
|
|
||||||
get = null;
|
|
||||||
startKey = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// monitor for zookeeper mode
|
// monitor for zookeeper mode
|
||||||
private static class ZookeeperMonitor extends Monitor {
|
private static class ZookeeperMonitor extends Monitor {
|
||||||
private List<String> hosts;
|
private List<String> hosts;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/**
|
/**
|
||||||
*
|
q *
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.HTable;
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
import org.apache.hadoop.hbase.client.Put;
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
|
@ -116,7 +117,7 @@ public class TestCanaryTool {
|
||||||
ToolRunner.run(testingUtility.getConfiguration(), canary, args);
|
ToolRunner.run(testingUtility.getConfiguration(), canary, args);
|
||||||
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
|
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
|
||||||
assertEquals("verify no write error count", 0, canary.getWriteFailures().size());
|
assertEquals("verify no write error count", 0, canary.getWriteFailures().size());
|
||||||
verify(sink, atLeastOnce()).publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong());
|
verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong());
|
||||||
}
|
}
|
||||||
|
|
||||||
//no table created, so there should be no regions
|
//no table created, so there should be no regions
|
||||||
|
@ -164,7 +165,7 @@ public class TestCanaryTool {
|
||||||
conf.setBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, true);
|
conf.setBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, true);
|
||||||
ToolRunner.run(conf, canary, args);
|
ToolRunner.run(conf, canary, args);
|
||||||
verify(sink, atLeastOnce())
|
verify(sink, atLeastOnce())
|
||||||
.publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong());
|
.publishReadTiming(isA(ServerName.class), isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong());
|
||||||
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
|
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue