HBASE-15931 Add log for long-running tasks in AsyncProcess
This commit is contained in:
parent
0cedd8b344
commit
36bb496e48
|
@ -119,6 +119,12 @@ class AsyncProcess {
|
|||
*/
|
||||
public static final String LOG_DETAILS_FOR_BATCH_ERROR = "hbase.client.log.batcherrors.details";
|
||||
|
||||
private final int thresholdToLogUndoneTaskDetails;
|
||||
private static final String THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS =
|
||||
"hbase.client.threshold.log.details";
|
||||
private static final int DEFAULT_THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS = 10;
|
||||
private final int THRESHOLD_TO_LOG_REGION_DETAILS = 2;
|
||||
|
||||
/**
|
||||
* The context used to wait for results from one submit call.
|
||||
* 1) If AsyncProcess is set to track errors globally, and not per call (for HTable puts),
|
||||
|
@ -330,6 +336,10 @@ class AsyncProcess {
|
|||
this.rpcCallerFactory = rpcCaller;
|
||||
this.rpcFactory = rpcFactory;
|
||||
this.logBatchErrorDetails = conf.getBoolean(LOG_DETAILS_FOR_BATCH_ERROR, false);
|
||||
|
||||
this.thresholdToLogUndoneTaskDetails =
|
||||
conf.getInt(THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS,
|
||||
DEFAULT_THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -387,7 +397,7 @@ class AsyncProcess {
|
|||
List<Integer> locationErrorRows = null;
|
||||
do {
|
||||
// Wait until there is at least one slot for a new task.
|
||||
waitForMaximumCurrentTasks(maxTotalConcurrentTasks - 1);
|
||||
waitForMaximumCurrentTasks(maxTotalConcurrentTasks - 1, tableName.getNameAsString());
|
||||
|
||||
// Remember the previous decisions about regions or region servers we put in the
|
||||
// final multi.
|
||||
|
@ -1765,18 +1775,19 @@ class AsyncProcess {
|
|||
@VisibleForTesting
|
||||
/** Waits until all outstanding tasks are done. Used in tests. */
|
||||
void waitUntilDone() throws InterruptedIOException {
|
||||
waitForMaximumCurrentTasks(0);
|
||||
waitForMaximumCurrentTasks(0, null);
|
||||
}
|
||||
|
||||
/** Wait until the async does not have more than max tasks in progress. */
|
||||
private void waitForMaximumCurrentTasks(int max) throws InterruptedIOException {
|
||||
waitForMaximumCurrentTasks(max, tasksInProgress, id);
|
||||
private void waitForMaximumCurrentTasks(int max, String tableName)
|
||||
throws InterruptedIOException {
|
||||
waitForMaximumCurrentTasks(max, tasksInProgress, id, tableName);
|
||||
}
|
||||
|
||||
// Break out this method so testable
|
||||
@VisibleForTesting
|
||||
static void waitForMaximumCurrentTasks(int max, final AtomicLong tasksInProgress, final long id)
|
||||
throws InterruptedIOException {
|
||||
void waitForMaximumCurrentTasks(int max, final AtomicLong tasksInProgress, final long id,
|
||||
String tableName) throws InterruptedIOException {
|
||||
long lastLog = EnvironmentEdgeManager.currentTime();
|
||||
long currentInProgress, oldInProgress = Long.MAX_VALUE;
|
||||
while ((currentInProgress = tasksInProgress.get()) > max) {
|
||||
|
@ -1785,7 +1796,11 @@ class AsyncProcess {
|
|||
if (now > lastLog + 10000) {
|
||||
lastLog = now;
|
||||
LOG.info("#" + id + ", waiting for some tasks to finish. Expected max="
|
||||
+ max + ", tasksInProgress=" + currentInProgress);
|
||||
+ max + ", tasksInProgress=" + currentInProgress +
|
||||
" hasError=" + hasError() + tableName == null ? "" : ", tableName=" + tableName);
|
||||
if (currentInProgress <= thresholdToLogUndoneTaskDetails) {
|
||||
logDetailsOfUndoneTasks(currentInProgress);
|
||||
}
|
||||
}
|
||||
}
|
||||
oldInProgress = currentInProgress;
|
||||
|
@ -1802,6 +1817,25 @@ class AsyncProcess {
|
|||
}
|
||||
}
|
||||
|
||||
private void logDetailsOfUndoneTasks(long taskInProgress) {
|
||||
ArrayList<ServerName> servers = new ArrayList<ServerName>();
|
||||
for (Map.Entry<ServerName, AtomicInteger> entry : taskCounterPerServer.entrySet()) {
|
||||
if (entry.getValue().get() > 0) {
|
||||
servers.add(entry.getKey());
|
||||
}
|
||||
}
|
||||
LOG.info("Left over " + taskInProgress + " task(s) are processed on server(s): " + servers);
|
||||
if (taskInProgress <= THRESHOLD_TO_LOG_REGION_DETAILS) {
|
||||
ArrayList<String> regions = new ArrayList<String>();
|
||||
for (Map.Entry<byte[], AtomicInteger> entry : taskCounterPerRegion.entrySet()) {
|
||||
if (entry.getValue().get() > 0) {
|
||||
regions.add(Bytes.toString(entry.getKey()));
|
||||
}
|
||||
}
|
||||
LOG.info("Regions against which left over task(s) are processed: " + regions);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Only used w/useGlobalErrors ctor argument, for HTable backward compat.
|
||||
* @return Whether there were any errors in any request since the last time
|
||||
|
@ -1817,12 +1851,13 @@ class AsyncProcess {
|
|||
* failed operations themselves.
|
||||
* @param failedRows an optional list into which the rows that failed since the last time
|
||||
* {@link #waitForAllPreviousOpsAndReset(List)} was called, or AP was created, are saved.
|
||||
* @param tableName name of the table
|
||||
* @return all the errors since the last time {@link #waitForAllPreviousOpsAndReset(List)}
|
||||
* was called, or AP was created.
|
||||
*/
|
||||
public RetriesExhaustedWithDetailsException waitForAllPreviousOpsAndReset(
|
||||
List<Row> failedRows) throws InterruptedIOException {
|
||||
waitForMaximumCurrentTasks(0);
|
||||
List<Row> failedRows, String tableName) throws InterruptedIOException {
|
||||
waitForMaximumCurrentTasks(0, tableName);
|
||||
if (!globalErrors.hasErrors()) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -237,7 +237,8 @@ public class BufferedMutatorImpl implements BufferedMutator {
|
|||
while (!buffer.isEmpty()) {
|
||||
ap.submit(tableName, buffer, true, null, false);
|
||||
}
|
||||
RetriesExhaustedWithDetailsException error = ap.waitForAllPreviousOpsAndReset(null);
|
||||
RetriesExhaustedWithDetailsException error =
|
||||
ap.waitForAllPreviousOpsAndReset(null, tableName.getNameAsString());
|
||||
if (error != null) {
|
||||
if (listener == null) {
|
||||
throw error;
|
||||
|
|
|
@ -1139,16 +1139,17 @@ public class TestAsyncProcess {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testWaitForMaximumCurrentTasks() throws InterruptedException, BrokenBarrierException {
|
||||
public void testWaitForMaximumCurrentTasks() throws Exception {
|
||||
final AtomicLong tasks = new AtomicLong(0);
|
||||
final AtomicInteger max = new AtomicInteger(0);
|
||||
final CyclicBarrier barrier = new CyclicBarrier(2);
|
||||
final AsyncProcess ap = new MyAsyncProcess(createHConnection(), conf);
|
||||
Runnable runnable = new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
barrier.await();
|
||||
AsyncProcess.waitForMaximumCurrentTasks(max.get(), tasks, 1);
|
||||
ap.waitForMaximumCurrentTasks(max.get(), tasks, 1, null);
|
||||
} catch (InterruptedIOException e) {
|
||||
Assert.fail(e.getMessage());
|
||||
} catch (InterruptedException e) {
|
||||
|
|
Loading…
Reference in New Issue