HBASE-8657 Miscellaneous log fixups for hbase-it; tidier logging, fix a few NPEs

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1487945 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2013-05-30 17:31:15 +00:00
parent 957f580d83
commit c920eb3c4c
9 changed files with 41 additions and 34 deletions

View File

@ -93,9 +93,8 @@ public class ClientScanner extends AbstractClientScanner {
public ClientScanner(final Configuration conf, final Scan scan, public ClientScanner(final Configuration conf, final Scan scan,
final byte[] tableName, HConnection connection) throws IOException { final byte[] tableName, HConnection connection) throws IOException {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Creating scanner over " LOG.debug("Scan table=" + Bytes.toString(tableName)
+ Bytes.toString(tableName) + ", startRow=" + Bytes.toStringBinary(scan.getStartRow()));
+ " starting at key '" + Bytes.toStringBinary(scan.getStartRow()) + "'");
} }
this.scan = scan; this.scan = scan;
this.tableName = tableName; this.tableName = tableName;
@ -192,7 +191,7 @@ public class ClientScanner extends AbstractClientScanner {
done) { done) {
close(); close();
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Finished scanning region " + this.currentRegion); LOG.debug("Finished region=" + this.currentRegion);
} }
return false; return false;
} }

View File

@ -1188,14 +1188,18 @@ public class HConnectionManager {
} }
} }
if (isNewCacheEntry) { if (isNewCacheEntry) {
LOG.debug("Cached location for " + if (LOG.isTraceEnabled()) {
LOG.trace("Cached location for " +
location.getRegionInfo().getRegionNameAsString() + location.getRegionInfo().getRegionNameAsString() +
" is " + location.getHostnamePort()); " is " + location.getHostnamePort());
}
} else if (isStaleUpdate && !location.equals(oldLocation)) { } else if (isStaleUpdate && !location.equals(oldLocation)) {
LOG.debug("Ignoring stale location update for " if (LOG.isTraceEnabled()) {
+ location.getRegionInfo().getRegionNameAsString() + ": " LOG.trace("Ignoring stale location update for "
+ location.getHostnamePort() + " at " + location.getSeqNum() + "; local " + location.getRegionInfo().getRegionNameAsString() + ": "
+ oldLocation.getHostnamePort() + " at " + oldLocation.getSeqNum()); + location.getHostnamePort() + " at " + location.getSeqNum() + "; local "
+ oldLocation.getHostnamePort() + " at " + oldLocation.getSeqNum());
}
} }
} }
@ -1388,7 +1392,7 @@ public class HConnectionManager {
// tries at this point is 1 or more; decrement to start from 0. // tries at this point is 1 or more; decrement to start from 0.
long pauseTime = ConnectionUtils.getPauseTime(pause, tries - 1); long pauseTime = ConnectionUtils.getPauseTime(pause, tries - 1);
LOG.info("getMaster attempt " + tries + " of " + numTries + LOG.info("getMaster attempt " + tries + " of " + numTries +
" failed; retrying after sleep of " +pauseTime + ", exception=" + " failed; retrying after sleep of " + pauseTime + ", exception=" +
exceptionCaught); exceptionCaught);
try { try {
@ -2217,10 +2221,11 @@ public class HConnectionManager {
if (LOG.isTraceEnabled() && isRetry) { if (LOG.isTraceEnabled() && isRetry) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (Action<R> action : e.getValue().allActions()) { for (Action<R> action : e.getValue().allActions()) {
sb.append(Bytes.toStringBinary(action.getAction().getRow())).append(';'); if (sb.length() > 0) sb.append(' ');
sb.append(Bytes.toStringBinary(action.getAction().getRow()));
} }
LOG.trace("Will retry requests to [" + e.getKey().getHostnamePort() LOG.trace("Attempt #" + this.curNumRetries + " against " + e.getKey().getHostnamePort()
+ "] after delay of [" + backoffTime + "] for rows [" + sb.toString() + "]"); + " after=" + backoffTime + "ms, row(s)=" + sb.toString());
} }
Triple<MultiAction<R>, HRegionLocation, Future<MultiResponse>> p = Triple<MultiAction<R>, HRegionLocation, Future<MultiResponse>> p =
new Triple<MultiAction<R>, HRegionLocation, Future<MultiResponse>>( new Triple<MultiAction<R>, HRegionLocation, Future<MultiResponse>>(
@ -2280,11 +2285,10 @@ public class HConnectionManager {
// we had more than numRetries for any action // we had more than numRetries for any action
// In this case, we will finish the current retries but we won't start new ones. // In this case, we will finish the current retries but we won't start new ones.
boolean lastRetry = false; boolean lastRetry = false;
// despite its name numRetries means number of tries. So if numRetries == 1 it means we // If hci.numTries is 1 or 0, we do not retry.
// won't retry. And we compare vs. 2 in case someone set it to zero.
boolean noRetry = (hci.numTries < 2); boolean noRetry = (hci.numTries < 2);
// Analyze and resubmit until all actions are done successfully or failed after numRetries // Analyze and resubmit until all actions are done successfully or failed after numTries
while (!this.inProgress.isEmpty()) { while (!this.inProgress.isEmpty()) {
// We need the original multi action to find out what actions to replay if // We need the original multi action to find out what actions to replay if
// we have a 'total' failure of the Future<MultiResponse> // we have a 'total' failure of the Future<MultiResponse>
@ -2355,8 +2359,9 @@ public class HConnectionManager {
// Retry all actions in toReplay then clear it. // Retry all actions in toReplay then clear it.
if (!noRetry && !toReplay.isEmpty()) { if (!noRetry && !toReplay.isEmpty()) {
if (isTraceEnabled) { if (isTraceEnabled) {
LOG.trace("Retrying due to errors" + (lastRetry ? " (one last time)" : "") LOG.trace("Retrying #" + this.curNumRetries +
+ ": " + retriedErrors.getDescriptionAndClear()); (lastRetry ? " (one last time)": "") + " because " +
retriedErrors.getDescriptionAndClear());
} }
doRetry(); doRetry();
if (lastRetry) { if (lastRetry) {

View File

@ -156,8 +156,8 @@ public class MetaScanner {
int rows = Math.min(rowLimit, configuration.getInt(HConstants.HBASE_META_SCANNER_CACHING, int rows = Math.min(rowLimit, configuration.getInt(HConstants.HBASE_META_SCANNER_CACHING,
HConstants.DEFAULT_HBASE_META_SCANNER_CACHING)); HConstants.DEFAULT_HBASE_META_SCANNER_CACHING));
scan.setCaching(rows); scan.setCaching(rows);
if (LOG.isDebugEnabled()) { if (LOG.isTraceEnabled()) {
LOG.debug("Scanning " + Bytes.toString(metaTableName) + " starting at row=" + LOG.trace("Scanning " + Bytes.toString(metaTableName) + " starting at row=" +
Bytes.toStringBinary(startRow) + " for max=" + rowUpperLimit + " with caching=" + rows); Bytes.toStringBinary(startRow) + " for max=" + rowUpperLimit + " with caching=" + rows);
} }
// Run the scan // Run the scan

View File

@ -126,13 +126,14 @@ extends RetriesExhaustedException {
Throwable t = this.exceptions.get(i); Throwable t = this.exceptions.get(i);
Row action = this.actions.get(i); Row action = this.actions.get(i);
String server = this.hostnameAndPort.get(i); String server = this.hostnameAndPort.get(i);
pw.append("Error"); pw.append("exception");
if (this.exceptions.size() > 1) { if (this.exceptions.size() > 1) {
pw.append(" #" + i); pw.append(" #" + i);
} }
pw.append(" from [" + server + "] for [" pw.append(" from " + server + " for "
+ ((action == null) ? "unknown key" : Bytes.toStringBinary(action.getRow())) + "]"); + ((action == null) ? "unknown key" : Bytes.toStringBinary(action.getRow())));
if (t != null) { if (t != null) {
pw.println();
t.printStackTrace(pw); t.printStackTrace(pw);
} }
} }

View File

@ -94,6 +94,10 @@ public abstract class ServerCallable<T> implements Callable<T> {
*/ */
public void prepare(final boolean reload) throws IOException { public void prepare(final boolean reload) throws IOException {
this.location = connection.getRegionLocation(tableName, row, reload); this.location = connection.getRegionLocation(tableName, row, reload);
if (this.location == null) {
throw new IOException("Failed to find location, tableName=" + tableName + ", row=" +
Bytes.toString(row) + ", reload=" + reload);
}
this.stub = connection.getClient(location.getServerName()); this.stub = connection.getClient(location.getServerName());
} }
@ -169,7 +173,7 @@ public abstract class ServerCallable<T> implements Callable<T> {
prepare(tries != 0); // if called with false, check table status on ZK prepare(tries != 0); // if called with false, check table status on ZK
return call(); return call();
} catch (Throwable t) { } catch (Throwable t) {
LOG.warn("Call exception, tries=" + tries + ", numRetries=" + numRetries + ": " + t); LOG.warn("Call exception, tries=" + tries + ", numRetries=" + numRetries, t);
t = translateException(t); t = translateException(t);
// translateException throws an exception when we should not retry, i.e. when it's the // translateException throws an exception when we should not retry, i.e. when it's the

View File

@ -574,7 +574,7 @@ public final class HConstants {
/** /**
* Default value of {@link #HBASE_CLIENT_RETRIES_NUMBER}. * Default value of {@link #HBASE_CLIENT_RETRIES_NUMBER}.
*/ */
public static int DEFAULT_HBASE_CLIENT_RETRIES_NUMBER = 10; public static int DEFAULT_HBASE_CLIENT_RETRIES_NUMBER = 20;
/** /**
* Parameter name for client prefetch limit, used as the maximum number of regions * Parameter name for client prefetch limit, used as the maximum number of regions

View File

@ -37,7 +37,7 @@ import org.junit.experimental.categories.Category;
@Category(IntegrationTests.class) @Category(IntegrationTests.class)
public class IntegrationTestDataIngestSlowDeterministic extends IngestIntegrationTestBase { public class IntegrationTestDataIngestSlowDeterministic extends IngestIntegrationTestBase {
private static final int SERVER_COUNT = 4; // number of slaves for the smallest cluster private static final int SERVER_COUNT = 4; // number of slaves for the smallest cluster
private static final long DEFAULT_RUN_TIME = 30 * 60 * 1000; private static final long DEFAULT_RUN_TIME = 10 * 60 * 1000;
private static final long CHAOS_EVERY_MS = 150 * 1000; // Chaos every 2.5 minutes. private static final long CHAOS_EVERY_MS = 150 * 1000; // Chaos every 2.5 minutes.
private ChaosMonkey monkey; private ChaosMonkey monkey;

View File

@ -22,13 +22,11 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Queue; import java.util.Queue;
import java.util.Random; import java.util.Random;
import java.util.Set;
import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLine;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -38,8 +36,8 @@ import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseCluster; import org.apache.hadoop.hbase.HBaseCluster;
import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.IntegrationTestingUtility;
import org.apache.hadoop.hbase.IntegrationTestDataIngestWithChaosMonkey; import org.apache.hadoop.hbase.IntegrationTestDataIngestWithChaosMonkey;
import org.apache.hadoop.hbase.IntegrationTestingUtility;
import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.Stoppable;
@ -49,7 +47,6 @@ import org.apache.hadoop.util.ToolRunner;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.google.protobuf.ServiceException;
/** /**
* A utility to injects faults in a running cluster. * A utility to injects faults in a running cluster.
@ -158,6 +155,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
/** Returns current region servers */ /** Returns current region servers */
protected ServerName[] getCurrentServers() throws IOException { protected ServerName[] getCurrentServers() throws IOException {
Collection<ServerName> regionServers = cluster.getClusterStatus().getServers(); Collection<ServerName> regionServers = cluster.getClusterStatus().getServers();
if (regionServers == null || regionServers.size() <= 0) return new ServerName [] {};
return regionServers.toArray(new ServerName[regionServers.size()]); return regionServers.toArray(new ServerName[regionServers.size()]);
} }

View File

@ -173,8 +173,8 @@ public class MultiThreadedWriter extends MultiThreadedAction {
} }
public void insert(HTable table, Put put, long keyBase) { public void insert(HTable table, Put put, long keyBase) {
long start = System.currentTimeMillis();
try { try {
long start = System.currentTimeMillis();
table.put(put); table.put(put);
totalOpTimeMs.addAndGet(System.currentTimeMillis() - start); totalOpTimeMs.addAndGet(System.currentTimeMillis() - start);
} catch (IOException e) { } catch (IOException e) {
@ -190,8 +190,8 @@ public class MultiThreadedWriter extends MultiThreadedAction {
pw.flush(); pw.flush();
exceptionInfo = StringUtils.stringifyException(e); exceptionInfo = StringUtils.stringifyException(e);
} }
LOG.error("Failed to insert: " + keyBase + "; region information: " LOG.error("Failed to insert: " + keyBase + " after " + (System.currentTimeMillis() - start) +
+ getRegionDebugInfoSafe(table, put.getRow()) + "; errors: " "ms; region information: " + getRegionDebugInfoSafe(table, put.getRow()) + "; errors: "
+ exceptionInfo); + exceptionInfo);
} }
} }