HBASE-16399 Provide an API to get list of failed regions and servername in Canary (Vishal Khandelwal)

This commit is contained in:
Andrew Purtell 2016-09-02 10:13:35 -07:00
parent 0b6eccf4c3
commit 5e905c02d9
2 changed files with 67 additions and 13 deletions

View File

@ -39,6 +39,7 @@ import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future; import java.util.concurrent.Future;
@ -115,11 +116,15 @@ public final class Canary implements Tool {
public long incReadFailureCount(); public long incReadFailureCount();
public void publishReadFailure(HRegionInfo region, Exception e); public void publishReadFailure(HRegionInfo region, Exception e);
public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
public void updateReadFailedHostList(HRegionInfo region, String serverName);
public Map<String,String> getReadFailures();
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
public long getWriteFailureCount(); public long getWriteFailureCount();
public void publishWriteFailure(HRegionInfo region, Exception e); public void publishWriteFailure(HRegionInfo region, Exception e);
public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e); public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime); public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
public void updateWriteFailedHostList(HRegionInfo region, String serverName);
public Map<String,String> getWriteFailures();
} }
// new extended sink for output regionserver mode info // new extended sink for output regionserver mode info
// do not change the Sink interface directly due to maintaining the API // do not change the Sink interface directly due to maintaining the API
@ -134,6 +139,9 @@ public final class Canary implements Tool {
private AtomicLong readFailureCount = new AtomicLong(0), private AtomicLong readFailureCount = new AtomicLong(0),
writeFailureCount = new AtomicLong(0); writeFailureCount = new AtomicLong(0);
private Map<String, String> readFailures = new ConcurrentHashMap<String, String>();
private Map<String, String> writeFailures = new ConcurrentHashMap<String, String>();
@Override @Override
public long getReadFailureCount() { public long getReadFailureCount() {
return readFailureCount.get(); return readFailureCount.get();
@ -157,10 +165,25 @@ public final class Canary implements Tool {
region.getRegionNameAsString(), column.getNameAsString()), e); region.getRegionNameAsString(), column.getNameAsString()), e);
} }
@Override
public void updateReadFailedHostList(HRegionInfo region, String serverName) {
readFailures.put(region.getRegionNameAsString(), serverName);
}
@Override @Override
public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
LOG.info(String.format("read from region %s column family %s in %dms", LOG.info(String.format("read from region %s column family %s in %dms",
region.getRegionNameAsString(), column.getNameAsString(), msTime)); region.getRegionNameAsString(), column.getNameAsString(), msTime));
}
@Override
public Map<String, String> getReadFailures() {
return readFailures;
}
@Override
public Map<String, String> getWriteFailures() {
return writeFailures;
} }
@Override @Override
@ -186,6 +209,12 @@ public final class Canary implements Tool {
LOG.info(String.format("write to region %s column family %s in %dms", LOG.info(String.format("write to region %s column family %s in %dms",
region.getRegionNameAsString(), column.getNameAsString(), msTime)); region.getRegionNameAsString(), column.getNameAsString(), msTime));
} }
@Override
public void updateWriteFailedHostList(HRegionInfo region, String serverName) {
writeFailures.put(region.getRegionNameAsString(), serverName);
}
} }
// a ExtendedSink implementation // a ExtendedSink implementation
public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink { public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
@ -265,11 +294,13 @@ public final class Canary implements Tool {
private Sink sink; private Sink sink;
private TaskType taskType; private TaskType taskType;
private boolean rawScanEnabled; private boolean rawScanEnabled;
private ServerName serverName;
RegionTask(Connection connection, HRegionInfo region, Sink sink, TaskType taskType, RegionTask(Connection connection, HRegionInfo region, ServerName serverName, Sink sink,
boolean rawScanEnabled) { TaskType taskType, boolean rawScanEnabled) {
this.connection = connection; this.connection = connection;
this.region = region; this.region = region;
this.serverName = serverName;
this.sink = sink; this.sink = sink;
this.taskType = taskType; this.taskType = taskType;
this.rawScanEnabled = rawScanEnabled; this.rawScanEnabled = rawScanEnabled;
@ -356,6 +387,7 @@ public final class Canary implements Tool {
sink.publishReadTiming(region, column, stopWatch.getTime()); sink.publishReadTiming(region, column, stopWatch.getTime());
} catch (Exception e) { } catch (Exception e) {
sink.publishReadFailure(region, column, e); sink.publishReadFailure(region, column, e);
sink.updateReadFailedHostList(region, serverName.getHostname());
} finally { } finally {
if (rs != null) { if (rs != null) {
rs.close(); rs.close();
@ -412,6 +444,7 @@ public final class Canary implements Tool {
table.close(); table.close();
} catch (IOException e) { } catch (IOException e) {
sink.publishWriteFailure(region, e); sink.publishWriteFailure(region, e);
sink.updateWriteFailedHostList(region, serverName.getHostname());
} }
return null; return null;
} }
@ -733,6 +766,14 @@ public final class Canary implements Tool {
return monitor.errorCode; return monitor.errorCode;
} }
public Map<String, String> getReadFailures() {
return sink.getReadFailures();
}
public Map<String, String> getWriteFailures() {
return sink.getWriteFailures();
}
private void printUsageAndExit() { private void printUsageAndExit() {
System.err.printf( System.err.printf(
"Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n", "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
@ -1143,16 +1184,25 @@ public final class Canary implements Tool {
} catch (TableNotFoundException e) { } catch (TableNotFoundException e) {
return new ArrayList<Future<Void>>(); return new ArrayList<Future<Void>>();
} }
finally {
if (table !=null) {
table.close();
}
}
List<RegionTask> tasks = new ArrayList<RegionTask>(); List<RegionTask> tasks = new ArrayList<RegionTask>();
RegionLocator regionLocator = null;
try { try {
List<HRegionInfo> hris = admin.getTableRegions(tableDesc.getTableName()); regionLocator = admin.getConnection().getRegionLocator(tableDesc.getTableName());
if (hris != null) { for (HRegionLocation location : regionLocator.getAllRegionLocations()) {
for (HRegionInfo region : hris) { ServerName rs = location.getServerName();
tasks.add(new RegionTask(admin.getConnection(), region, sink, taskType, rawScanEnabled)); HRegionInfo region = location.getRegionInfo();
} tasks.add(new RegionTask(admin.getConnection(), region, rs, sink, taskType, rawScanEnabled));
} }
} finally { } finally {
table.close(); if (regionLocator != null) {
regionLocator.close();
}
} }
return executor.invokeAll(tasks); return executor.invokeAll(tasks);
} }

View File

@ -53,6 +53,7 @@ import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.mockito.Matchers.argThat; import static org.mockito.Matchers.argThat;
import static org.mockito.Mockito.never; import static org.mockito.Mockito.never;
import static org.junit.Assert.assertEquals;
@RunWith(MockitoJUnitRunner.class) @RunWith(MockitoJUnitRunner.class)
@Category({MediumTests.class}) @Category({MediumTests.class})
@ -110,10 +111,11 @@ public class TestCanaryTool {
ExecutorService executor = new ScheduledThreadPoolExecutor(1); ExecutorService executor = new ScheduledThreadPoolExecutor(1);
Canary.RegionServerStdOutSink sink = spy(new Canary.RegionServerStdOutSink()); Canary.RegionServerStdOutSink sink = spy(new Canary.RegionServerStdOutSink());
Canary canary = new Canary(executor, sink); Canary canary = new Canary(executor, sink);
String[] args = { "-t", "10000", "testTable" }; String[] args = { "-writeSniffing", "-t", "10000", "testTable" };
ToolRunner.run(testingUtility.getConfiguration(), canary, args); ToolRunner.run(testingUtility.getConfiguration(), canary, args);
verify(sink, atLeastOnce()) assertEquals("verify no read error count", 0, canary.getReadFailures().size());
.publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); assertEquals("verify no write error count", 0, canary.getWriteFailures().size());
verify(sink, atLeastOnce()).publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong());
} }
//no table created, so there should be no regions //no table created, so there should be no regions
@ -162,6 +164,7 @@ public class TestCanaryTool {
ToolRunner.run(conf, canary, args); ToolRunner.run(conf, canary, args);
verify(sink, atLeastOnce()) verify(sink, atLeastOnce())
.publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); .publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong());
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
} }
private void runRegionserverCanary() throws Exception { private void runRegionserverCanary() throws Exception {
@ -169,6 +172,7 @@ public class TestCanaryTool {
Canary canary = new Canary(executor, new Canary.RegionServerStdOutSink()); Canary canary = new Canary(executor, new Canary.RegionServerStdOutSink());
String[] args = { "-t", "10000", "-regionserver"}; String[] args = { "-t", "10000", "-regionserver"};
ToolRunner.run(testingUtility.getConfiguration(), canary, args); ToolRunner.run(testingUtility.getConfiguration(), canary, args);
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
} }
} }