HBASE-9843 Various fixes in client code

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1536865 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
nkeywal 2013-10-29 19:45:44 +00:00
parent 3bebeca24d
commit 53bb2f6be7
7 changed files with 155 additions and 115 deletions

View File

@ -87,7 +87,9 @@ import org.cloudera.htrace.Trace;
*/
class AsyncProcess<CResult> {
private static final Log LOG = LogFactory.getLog(AsyncProcess.class);
private final static int START_LOG_ERRORS_CNT = 4;
protected static final AtomicLong COUNTER = new AtomicLong();
protected final long id;
private final int startLogErrorsCnt;
protected final HConnection hConnection;
protected final TableName tableName;
protected final ExecutorService pool;
@ -97,6 +99,7 @@ class AsyncProcess<CResult> {
protected final AtomicBoolean hasError = new AtomicBoolean(false);
protected final AtomicLong tasksSent = new AtomicLong(0);
protected final AtomicLong tasksDone = new AtomicLong(0);
protected final AtomicLong retriesCnt = new AtomicLong(0);
protected final ConcurrentMap<String, AtomicInteger> taskCounterPerRegion =
new ConcurrentHashMap<String, AtomicInteger>();
protected final ConcurrentMap<ServerName, AtomicInteger> taskCounterPerServer =
@ -121,7 +124,6 @@ class AsyncProcess<CResult> {
protected final int maxConcurrentTasksPerServer;
protected final long pause;
protected int numTries;
protected final boolean useServerTrackerForRetries;
protected int serverTrackerTimeout;
protected RpcRetryingCallerFactory rpcCallerFactory;
@ -205,6 +207,8 @@ class AsyncProcess<CResult> {
this.pool = pool;
this.callback = callback;
this.id = COUNTER.incrementAndGet();
this.pause = conf.getLong(HConstants.HBASE_CLIENT_PAUSE,
HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
this.numTries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
@ -217,6 +221,11 @@ class AsyncProcess<CResult> {
this.maxConcurrentTasksPerRegion = conf.getInt(HConstants.HBASE_CLIENT_MAX_PERREGION_TASKS,
HConstants.DEFAULT_HBASE_CLIENT_MAX_PERREGION_TASKS);
// A few failure is fine: region moved, then is not opened, then is overloaded. We try
// to have an acceptable heuristic for the number of errors we don't log.
// 9 was chosen because we wait for 1s at this stage.
this.startLogErrorsCnt = conf.getInt("hbase.client.start.log.errors.counter", 9);
if (this.maxTotalConcurrentTasks <= 0) {
throw new IllegalArgumentException("maxTotalConcurrentTasks=" + maxTotalConcurrentTasks);
}
@ -229,23 +238,19 @@ class AsyncProcess<CResult> {
maxConcurrentTasksPerRegion);
}
this.useServerTrackerForRetries =
conf.getBoolean(HConnectionManager.RETRIES_BY_SERVER_KEY, true);
if (this.useServerTrackerForRetries) {
// Server tracker allows us to do faster, and yet useful (hopefully), retries.
// However, if we are too useful, we might fail very quickly due to retry count limit.
// To avoid this, we are going to cheat for now (see HBASE-7659), and calculate maximum
// retry time if normal retries were used. Then we will retry until this time runs out.
// If we keep hitting one server, the net effect will be the incremental backoff, and
// essentially the same number of retries as planned. If we have to do faster retries,
// we will do more retries in aggregate, but the user will be none the wiser.
this.serverTrackerTimeout = 0;
for (int i = 0; i < this.numTries; ++i) {
serverTrackerTimeout += ConnectionUtils.getPauseTime(this.pause, i);
}
// Server tracker allows us to do faster, and yet useful (hopefully), retries.
// However, if we are too useful, we might fail very quickly due to retry count limit.
// To avoid this, we are going to cheat for now (see HBASE-7659), and calculate maximum
// retry time if normal retries were used. Then we will retry until this time runs out.
// If we keep hitting one server, the net effect will be the incremental backoff, and
// essentially the same number of retries as planned. If we have to do faster retries,
// we will do more retries in aggregate, but the user will be none the wiser.
this.serverTrackerTimeout = 0;
for (int i = 0; i < this.numTries; ++i) {
serverTrackerTimeout += ConnectionUtils.getPauseTime(this.pause, i);
}
this.rpcCallerFactory = rpcCaller;
}
@ -291,7 +296,7 @@ class AsyncProcess<CResult> {
Iterator<? extends Row> it = rows.iterator();
while (it.hasNext()) {
Row r = it.next();
HRegionLocation loc = findDestLocation(r, 1, posInList);
HRegionLocation loc = findDestLocation(r, posInList);
if (loc == null) { // loc is null if there is an error such as meta not available.
it.remove();
@ -332,18 +337,17 @@ class AsyncProcess<CResult> {
* Find the destination.
*
* @param row the row
* @param numAttempt the num attempt
* @param posInList the position in the list
* @return the destination. Null if we couldn't find it.
*/
private HRegionLocation findDestLocation(Row row, int numAttempt, int posInList) {
if (row == null) throw new IllegalArgumentException("row cannot be null");
private HRegionLocation findDestLocation(Row row, int posInList) {
if (row == null) throw new IllegalArgumentException("#" + id + ", row cannot be null");
HRegionLocation loc = null;
IOException locationException = null;
try {
loc = hConnection.locateRegion(this.tableName, row.getRow());
if (loc == null) {
locationException = new IOException("No location found, aborting submit for" +
locationException = new IOException("#" + id + ", no location found, aborting submit for" +
" tableName=" + tableName +
" rowkey=" + Arrays.toString(row.getRow()));
}
@ -353,7 +357,7 @@ class AsyncProcess<CResult> {
if (locationException != null) {
// There are multiple retries in locateRegion already. No need to add new.
// We can't continue with this row, hence it's the last retry.
manageError(numAttempt, posInList, row, false, locationException, null);
manageError(posInList, row, false, locationException, null);
return null;
}
@ -460,12 +464,17 @@ class AsyncProcess<CResult> {
private void submit(List<Action<Row>> initialActions,
List<Action<Row>> currentActions, int numAttempt,
final HConnectionManager.ServerErrorTracker errorsByServer) {
if (numAttempt > 1){
retriesCnt.incrementAndGet();
}
// group per location => regions server
final Map<HRegionLocation, MultiAction<Row>> actionsByServer =
new HashMap<HRegionLocation, MultiAction<Row>>();
for (Action<Row> action : currentActions) {
HRegionLocation loc = findDestLocation(action.getAction(), 1, action.getOriginalIndex());
HRegionLocation loc = findDestLocation(action.getAction(), action.getOriginalIndex());
if (loc != null) {
addAction(loc, action, actionsByServer);
}
@ -503,7 +512,8 @@ class AsyncProcess<CResult> {
try {
res = createCaller(callable).callWithoutRetries(callable);
} catch (IOException e) {
LOG.warn("Call to " + loc.getServerName() + " failed numAttempt=" + numAttempt +
LOG.warn("#" + id + ", call to " + loc.getServerName() +
" failed numAttempt=" + numAttempt +
", resubmitting all since not sure where we are at", e);
resubmitAll(initialActions, multiAction, loc, numAttempt + 1, e, errorsByServer);
return;
@ -522,7 +532,7 @@ class AsyncProcess<CResult> {
// This should never happen. But as the pool is provided by the end user, let's secure
// this a little.
decTaskCounters(multiAction.getRegions(), loc.getServerName());
LOG.warn("The task was rejected by the pool. This is unexpected." +
LOG.warn("#" + id + ", the task was rejected by the pool. This is unexpected." +
" Server is " + loc.getServerName(), ree);
// We're likely to fail again, but this will increment the attempt counter, so it will
// finish.
@ -551,7 +561,6 @@ class AsyncProcess<CResult> {
/**
* Check that we can retry acts accordingly: logs, set the error status, call the callbacks.
*
* @param numAttempt the number of this attempt
* @param originalIndex the position in the list sent
* @param row the row
* @param canRetry if false, we won't retry whatever the settings.
@ -559,13 +568,10 @@ class AsyncProcess<CResult> {
* @param location the location, if any (can be null)
* @return true if the action can be retried, false otherwise.
*/
private boolean manageError(int numAttempt, int originalIndex, Row row, boolean canRetry,
private boolean manageError(int originalIndex, Row row, boolean canRetry,
Throwable throwable, HRegionLocation location) {
if (canRetry) {
if (numAttempt >= numTries ||
(throwable != null && throwable instanceof DoNotRetryIOException)) {
canRetry = false;
}
if (canRetry && throwable != null && throwable instanceof DoNotRetryIOException) {
canRetry = false;
}
byte[] region = location == null ? null : location.getRegionInfo().getEncodedNameAsBytes();
@ -608,15 +614,14 @@ class AsyncProcess<CResult> {
List<Action<Row>> toReplay = new ArrayList<Action<Row>>(initialActions.size());
for (Map.Entry<byte[], List<Action<Row>>> e : rsActions.actions.entrySet()) {
for (Action<Row> action : e.getValue()) {
if (manageError(numAttempt, action.getOriginalIndex(), action.getAction(),
true, t, location)) {
if (manageError(action.getOriginalIndex(), action.getAction(), true, t, location)) {
toReplay.add(action);
}
}
}
if (toReplay.isEmpty()) {
LOG.warn("Attempt #" + numAttempt + "/" + numTries + " failed for all " +
LOG.warn("#" + id + ", attempt #" + numAttempt + "/" + numTries + " failed for all " +
initialActions.size() + " ops, NOT resubmitting, " + location.getServerName());
} else {
submit(initialActions, toReplay, numAttempt, errorsByServer);
@ -628,7 +633,7 @@ class AsyncProcess<CResult> {
*
* @param initialActions - the whole action list
* @param rsActions - the actions for this location
* @param location - the location
* @param location - the location. It's used as a server name.
* @param responses - the response, if any
* @param numAttempt - the attempt
*/
@ -638,8 +643,8 @@ class AsyncProcess<CResult> {
HConnectionManager.ServerErrorTracker errorsByServer) {
if (responses == null) {
LOG.info("Attempt #" + numAttempt + "/" + numTries + " failed all ops, trying resubmit," +
location);
LOG.info("#" + id + ", attempt #" + numAttempt + "/" + numTries +
" failed all ops, trying resubmit," + location);
resubmitAll(initialActions, rsActions, location, numAttempt + 1, null, errorsByServer);
return;
}
@ -670,14 +675,15 @@ class AsyncProcess<CResult> {
failureCount++;
if (!regionFailureRegistered) { // We're doing this once per location.
regionFailureRegistered= true;
// The location here is used as a server name.
hConnection.updateCachedLocations(this.tableName, row.getRow(), result, location);
if (errorsByServer != null) {
if (failureCount == 1) {
errorsByServer.reportServerError(location);
canRetry = errorsByServer.canRetryMore();
canRetry = errorsByServer.canRetryMore(numAttempt);
}
}
if (manageError(numAttempt, correspondingAction.getOriginalIndex(), row, canRetry,
if (manageError(correspondingAction.getOriginalIndex(), row, canRetry,
throwable, location)) {
toReplay.add(correspondingAction);
}
@ -694,21 +700,24 @@ class AsyncProcess<CResult> {
}
if (!toReplay.isEmpty()) {
long backOffTime = (errorsByServer != null ?
errorsByServer.calculateBackoffTime(location, pause) :
ConnectionUtils.getPauseTime(pause, numAttempt));
if (numAttempt > START_LOG_ERRORS_CNT && LOG.isDebugEnabled()) {
// We have two contradicting needs here:
// 1) We want to get the new location after having slept, as it may change.
// 2) We want to take into account the location when calculating the sleep time.
// It should be possible to have some heuristics to take the right decision. Short term,
// we go for one.
long backOffTime = errorsByServer.calculateBackoffTime(location, pause);
if (numAttempt > startLogErrorsCnt) {
// We use this value to have some logs when we have multiple failures, but not too many
// logs, as errors are to be expected when a region moves, splits and so on
LOG.debug("Attempt #" + numAttempt + "/" + numTries + " failed " + failureCount +
" ops , resubmitting " + toReplay.size() + ", " + location + ", last exception was: " +
(throwable == null ? "null" : throwable.getMessage()) +
", sleeping " + backOffTime + "ms");
LOG.info(createLog(numAttempt, failureCount, toReplay.size(),
location.getServerName(), throwable, backOffTime, true,
errorsByServer.getStartTrackingTime()));
}
try {
Thread.sleep(backOffTime);
} catch (InterruptedException e) {
LOG.warn("Not sent: " + toReplay.size() + " operations, " + location, e);
LOG.warn("#" + id + ", not sent: " + toReplay.size() + " operations, " + location, e);
Thread.interrupted();
return;
}
@ -717,16 +726,46 @@ class AsyncProcess<CResult> {
} else {
if (failureCount != 0) {
// We have a failure but nothing to retry. We're done, it's a final failure..
LOG.warn("Attempt #" + numAttempt + "/" + numTries + " failed for " + failureCount +
" ops on " + location.getServerName() + " NOT resubmitting. " + location);
} else if (numAttempt > START_LOG_ERRORS_CNT + 1 && LOG.isDebugEnabled()) {
LOG.warn(createLog(numAttempt, failureCount, toReplay.size(),
location.getServerName(), throwable, -1, false,
errorsByServer.getStartTrackingTime()));
} else if (numAttempt > startLogErrorsCnt + 1) {
// The operation was successful, but needed several attempts. Let's log this.
LOG.debug("Attempt #" + numAttempt + "/" + numTries + " finally suceeded, size=" +
toReplay.size());
LOG.info(createLog(numAttempt, failureCount, toReplay.size(),
location.getServerName(), throwable, -1, false,
errorsByServer.getStartTrackingTime()));
}
}
}
private String createLog(int numAttempt, int failureCount, int replaySize, ServerName sn,
Throwable error, long backOffTime, boolean willRetry, String startTime){
StringBuilder sb = new StringBuilder();
sb.append("#").append(id).append(", table=").append(tableName).
append(", Attempt #").append(numAttempt).append("/").append(numTries).append(" ");
if (failureCount > 0 || error != null){
sb.append("failed ").append(failureCount).append(" ops").append(", last exception was: ").
append(error == null ? "null" : error.getMessage());
}else {
sb.append("SUCCEEDED");
}
sb.append(" on server ").append(sn);
sb.append(", tracking started at ").append(startTime);
if (willRetry) {
sb.append(" - retrying after sleeping for ").append(backOffTime).append(" ms").
append(", will replay ").append(replaySize).append(" ops.");
} else if (failureCount > 0) {
sb.append(" - FAILED, NOT RETRYING ANYMORE");
}
return sb.toString();
}
/**
* Waits for another task to finish.
* @param currentNumberOfTask - the number of task finished when calling the method.
@ -738,7 +777,7 @@ class AsyncProcess<CResult> {
this.tasksDone.wait(100);
}
} catch (InterruptedException e) {
throw new InterruptedIOException("Interrupted." +
throw new InterruptedIOException("#" + id + ", interrupted." +
" currentNumberOfTask=" + currentNumberOfTask +
", tableName=" + tableName + ", tasksDone=" + tasksDone.get());
}
@ -756,9 +795,10 @@ class AsyncProcess<CResult> {
long now = EnvironmentEdgeManager.currentTimeMillis();
if (now > lastLog + 10000) {
lastLog = now;
LOG.info(": Waiting for the global number of running tasks to be equals or less than "
LOG.info("#" + id + ", waiting for some tasks to finish. Expected max="
+ max + ", tasksSent=" + tasksSent.get() + ", tasksDone=" + tasksDone.get() +
", currentTasksDone=" + currentTasksDone + ", tableName=" + tableName);
", currentTasksDone=" + currentTasksDone + ", retries=" + retriesCnt.get() +
" hasError=" + hasError() + ", tableName=" + tableName);
}
waitForNextTaskDone(currentTasksDone);
currentTasksDone = this.tasksDone.get();
@ -848,10 +888,6 @@ class AsyncProcess<CResult> {
* @return ServerErrorTracker to use, null if there is no ServerErrorTracker on this connection
*/
protected HConnectionManager.ServerErrorTracker createServerErrorTracker() {
if (useServerTrackerForRetries){
return new HConnectionManager.ServerErrorTracker(this.serverTrackerTimeout);
}else {
return null;
}
return new HConnectionManager.ServerErrorTracker(this.serverTrackerTimeout, this.numTries);
}
}

View File

@ -24,6 +24,7 @@ import java.lang.reflect.Constructor;
import java.lang.reflect.UndeclaredThrowableException;
import java.net.SocketException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
@ -2638,15 +2639,23 @@ public class HConnectionManager {
// We need a concurrent map here, as we could have multiple threads updating it in parallel.
private final ConcurrentMap<HRegionLocation, ServerErrors> errorsByServer =
new ConcurrentHashMap<HRegionLocation, ServerErrors>();
private long canRetryUntil = 0;
private final long canRetryUntil;
private final int maxRetries;
private final String startTrackingTime;
public ServerErrorTracker(long timeout) {
LOG.trace("Server tracker timeout is " + timeout + "ms");
public ServerErrorTracker(long timeout, int maxRetries) {
this.maxRetries = maxRetries;
this.canRetryUntil = EnvironmentEdgeManager.currentTimeMillis() + timeout;
this.startTrackingTime = new Date().toString();
}
boolean canRetryMore() {
return EnvironmentEdgeManager.currentTimeMillis() < this.canRetryUntil;
/**
* We stop to retry when we have exhausted BOTH the number of retries and the time allocated.
*/
boolean canRetryMore(int numRetry) {
// If there is a single try we must not take into account the time.
return numRetry < maxRetries || (maxRetries > 1 &&
EnvironmentEdgeManager.currentTimeMillis() < this.canRetryUntil);
}
/**
@ -2657,20 +2666,12 @@ public class HConnectionManager {
* @return The time to wait before sending next request.
*/
long calculateBackoffTime(HRegionLocation server, long basePause) {
long result = 0;
long result;
ServerErrors errorStats = errorsByServer.get(server);
if (errorStats != null) {
result = ConnectionUtils.getPauseTime(basePause, errorStats.retries);
// Adjust by the time we already waited since last talking to this server.
long now = EnvironmentEdgeManager.currentTimeMillis();
long timeSinceLastError = now - errorStats.getLastErrorTime();
if (timeSinceLastError > 0) {
result = Math.max(0, result - timeSinceLastError);
}
// Finally, see if the backoff time overshoots the timeout.
if (result > 0 && (now + result > this.canRetryUntil)) {
result = Math.max(0, this.canRetryUntil - now);
}
result = ConnectionUtils.getPauseTime(basePause, errorStats.retries.get());
} else {
result = 0; // yes, if the server is not in our list we don't wait before retrying.
}
return result;
}
@ -2685,29 +2686,25 @@ public class HConnectionManager {
if (errors != null) {
errors.addError();
} else {
errorsByServer.put(server, new ServerErrors());
errors = errorsByServer.putIfAbsent(server, new ServerErrors());
if (errors != null){
errors.addError();
}
}
}
String getStartTrackingTime() {
return startTrackingTime;
}
/**
* The record of errors for a server.
*/
private static class ServerErrors {
public long lastErrorTime;
public int retries;
public ServerErrors() {
this.lastErrorTime = EnvironmentEdgeManager.currentTimeMillis();
this.retries = 0;
}
public final AtomicInteger retries = new AtomicInteger(0);
public void addError() {
this.lastErrorTime = EnvironmentEdgeManager.currentTimeMillis();
++this.retries;
}
public long getLastErrorTime() {
return this.lastErrorTime;
retries.incrementAndGet();
}
}
}

View File

@ -679,7 +679,7 @@ public class TestAsyncProcess {
HTable ht = new HTable();
Configuration configuration = new Configuration(conf);
configuration.setBoolean(HConnectionManager.RETRIES_BY_SERVER_KEY, true);
configuration.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 20);
configuration.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
// set default writeBufferSize
ht.setWriteBufferSize(configuration.getLong("hbase.client.write.buffer", 2097152));
@ -688,24 +688,21 @@ public class TestAsyncProcess {
ht.ap = new MyAsyncProcess<Object>(mci, null, configuration);
Assert.assertTrue(ht.ap.useServerTrackerForRetries);
Assert.assertNotNull(ht.ap.createServerErrorTracker());
Assert.assertTrue(ht.ap.serverTrackerTimeout > 10000);
Assert.assertTrue(ht.ap.serverTrackerTimeout > 200);
ht.ap.serverTrackerTimeout = 1;
Put p = createPut(1, false);
ht.setAutoFlush(false, false);
ht.put(p);
long start = System.currentTimeMillis();
try {
ht.flushCommits();
Assert.fail();
} catch (RetriesExhaustedWithDetailsException expected) {
}
// Checking that the ErrorsServers came into play and made us stop immediately
Assert.assertTrue((System.currentTimeMillis() - start) < 10000);
// Checking that the ErrorsServers came into play and didn't make us stop immediately
Assert.assertEquals(ht.ap.tasksSent.get(), 3);
}
/**
@ -731,8 +728,7 @@ public class TestAsyncProcess {
MyConnectionImpl2 con = new MyConnectionImpl2(hrls);
ht.connection = con;
ht.batch(gets);
ht.batch(gets);
Assert.assertEquals(con.ap.nbActions.get(), NB_REGS);
Assert.assertEquals("1 multi response per server", 2, con.ap.nbMultiResponse.get());

View File

@ -512,9 +512,10 @@ public final class HConstants {
* run out of array items. Retries beyond this use the last number in the array. So, for
* example, if hbase.client.pause is 1 second, and maximum retries count
* hbase.client.retries.number is 10, we will retry at the following intervals:
* 1, 2, 3, 10, 100, 100, 100, 100, 100, 100.
* 1, 2, 3, 5, 10, 20, 40, 100, 100, 100.
* With 100ms, a back-off of 200 means 20s
*/
public static int RETRY_BACKOFF[] = { 1, 2, 3, 5, 10, 100 };
public static int RETRY_BACKOFF[] = { 1, 2, 3, 5, 10, 20, 40, 100, 100, 100, 100, 200, 200 };
public static final String REGION_IMPL = "hbase.hregion.impl";
@ -591,7 +592,7 @@ public final class HConstants {
/**
* Default value of {@link #HBASE_CLIENT_MAX_PERSERVER_TASKS}.
*/
public static final int DEFAULT_HBASE_CLIENT_MAX_PERSERVER_TASKS = 5;
public static final int DEFAULT_HBASE_CLIENT_MAX_PERSERVER_TASKS = 2;
/**
* The maximum number of concurrent connections the client will maintain to a single

View File

@ -93,13 +93,13 @@ public class ClusterStatusPublisher extends Chore {
* We want to limit the size of the protobuf message sent, do fit into a single packet.
* a reasonable size for ip / ethernet is less than 1Kb.
*/
public static int MAX_SERVER_PER_MESSAGE = 10;
public final static int MAX_SERVER_PER_MESSAGE = 10;
/**
* If a server dies, we're sending the information multiple times in case a receiver misses the
* message.
*/
public static int NB_SEND = 5;
public final static int NB_SEND = 5;
public ClusterStatusPublisher(HMaster master, Configuration conf,
Class<? extends Publisher> publisherClass)

View File

@ -2534,7 +2534,13 @@ public class HRegion implements HeapSize { // , Writable{
if (this.memstoreSize.get() > this.blockingMemStoreSize) {
requestFlush();
throw new RegionTooBusyException("above memstore limit");
throw new RegionTooBusyException("Above memstore limit, " +
"regionName=" + (this.getRegionInfo() == null ? "unknown" :
this.getRegionInfo().getRegionNameAsString()) +
", server=" + (this.getRegionServerServices() == null ? "unknown" :
this.getRegionServerServices().getServerName()) +
", memstoreSize=" + memstoreSize.get() +
", blockingMemStoreSize=" + blockingMemStoreSize);
}
}
@ -5355,10 +5361,14 @@ public class HRegion implements HeapSize { // , Writable{
throws RegionTooBusyException, InterruptedIOException {
try {
final long waitTime = Math.min(maxBusyWaitDuration,
busyWaitDuration * Math.min(multiplier, maxBusyWaitMultiplier));
busyWaitDuration * Math.min(multiplier, maxBusyWaitMultiplier));
if (!lock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
throw new RegionTooBusyException(
"failed to get a lock in " + waitTime + "ms");
"failed to get a lock in " + waitTime + " ms. " +
"regionName=" + (this.getRegionInfo() == null ? "unknown" :
this.getRegionInfo().getRegionNameAsString()) +
", server=" + (this.getRegionServerServices() == null ? "unknown" :
this.getRegionServerServices().getServerName()));
}
} catch (InterruptedException ie) {
LOG.info("Interrupted while waiting for a lock");

View File

@ -870,7 +870,7 @@ public class TestHCM {
long timeBase = timeMachine.currentTimeMillis();
long largeAmountOfTime = ANY_PAUSE * 1000;
HConnectionManager.ServerErrorTracker tracker =
new HConnectionManager.ServerErrorTracker(largeAmountOfTime);
new HConnectionManager.ServerErrorTracker(largeAmountOfTime, 100);
// The default backoff is 0.
assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE));
@ -912,11 +912,11 @@ public class TestHCM {
// We also should not go over the boundary; last retry would be on it.
long timeLeft = (long)(ANY_PAUSE * 0.5);
timeMachine.setValue(timeBase + largeAmountOfTime - timeLeft);
assertTrue(tracker.canRetryMore());
assertTrue(tracker.canRetryMore(1));
tracker.reportServerError(location);
assertEquals(timeLeft, tracker.calculateBackoffTime(location, ANY_PAUSE));
timeMachine.setValue(timeBase + largeAmountOfTime);
assertFalse(tracker.canRetryMore());
assertFalse(tracker.canRetryMore(1));
} finally {
EnvironmentEdgeManager.reset();
}