From c920eb3c4c3fba8a52a6be8794e63d865a38196b Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 30 May 2013 17:31:15 +0000 Subject: [PATCH] HBASE-8657 Miscellaneous log fixups for hbase-it; tidier logging, fix a few NPEs git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1487945 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop/hbase/client/ClientScanner.java | 7 ++-- .../hbase/client/HConnectionManager.java | 33 +++++++++++-------- .../hadoop/hbase/client/MetaScanner.java | 4 +-- .../RetriesExhaustedWithDetailsException.java | 7 ++-- .../hadoop/hbase/client/ServerCallable.java | 6 +++- .../org/apache/hadoop/hbase/HConstants.java | 2 +- ...rationTestDataIngestSlowDeterministic.java | 4 +-- .../apache/hadoop/hbase/util/ChaosMonkey.java | 6 ++-- .../hbase/util/MultiThreadedWriter.java | 6 ++-- 9 files changed, 41 insertions(+), 34 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java index 581e424dc25..ebab79921ae 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java @@ -93,9 +93,8 @@ public class ClientScanner extends AbstractClientScanner { public ClientScanner(final Configuration conf, final Scan scan, final byte[] tableName, HConnection connection) throws IOException { if (LOG.isDebugEnabled()) { - LOG.debug("Creating scanner over " - + Bytes.toString(tableName) - + " starting at key '" + Bytes.toStringBinary(scan.getStartRow()) + "'"); + LOG.debug("Scan table=" + Bytes.toString(tableName) + + ", startRow=" + Bytes.toStringBinary(scan.getStartRow())); } this.scan = scan; this.tableName = tableName; @@ -192,7 +191,7 @@ public class ClientScanner extends AbstractClientScanner { done) { close(); if (LOG.isDebugEnabled()) { - LOG.debug("Finished scanning region " + this.currentRegion); + LOG.debug("Finished region=" + this.currentRegion); } return false; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java index 3dd65a73b62..35d44dac9af 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java @@ -1188,14 +1188,18 @@ public class HConnectionManager { } } if (isNewCacheEntry) { - LOG.debug("Cached location for " + + if (LOG.isTraceEnabled()) { + LOG.trace("Cached location for " + location.getRegionInfo().getRegionNameAsString() + " is " + location.getHostnamePort()); + } } else if (isStaleUpdate && !location.equals(oldLocation)) { - LOG.debug("Ignoring stale location update for " - + location.getRegionInfo().getRegionNameAsString() + ": " - + location.getHostnamePort() + " at " + location.getSeqNum() + "; local " - + oldLocation.getHostnamePort() + " at " + oldLocation.getSeqNum()); + if (LOG.isTraceEnabled()) { + LOG.trace("Ignoring stale location update for " + + location.getRegionInfo().getRegionNameAsString() + ": " + + location.getHostnamePort() + " at " + location.getSeqNum() + "; local " + + oldLocation.getHostnamePort() + " at " + oldLocation.getSeqNum()); + } } } @@ -1388,7 +1392,7 @@ public class HConnectionManager { // tries at this point is 1 or more; decrement to start from 0. long pauseTime = ConnectionUtils.getPauseTime(pause, tries - 1); LOG.info("getMaster attempt " + tries + " of " + numTries + - " failed; retrying after sleep of " +pauseTime + ", exception=" + + " failed; retrying after sleep of " + pauseTime + ", exception=" + exceptionCaught); try { @@ -2217,10 +2221,11 @@ public class HConnectionManager { if (LOG.isTraceEnabled() && isRetry) { StringBuilder sb = new StringBuilder(); for (Action action : e.getValue().allActions()) { - sb.append(Bytes.toStringBinary(action.getAction().getRow())).append(';'); + if (sb.length() > 0) sb.append(' '); + sb.append(Bytes.toStringBinary(action.getAction().getRow())); } - LOG.trace("Will retry requests to [" + e.getKey().getHostnamePort() - + "] after delay of [" + backoffTime + "] for rows [" + sb.toString() + "]"); + LOG.trace("Attempt #" + this.curNumRetries + " against " + e.getKey().getHostnamePort() + + " after=" + backoffTime + "ms, row(s)=" + sb.toString()); } Triple, HRegionLocation, Future> p = new Triple, HRegionLocation, Future>( @@ -2280,11 +2285,10 @@ public class HConnectionManager { // we had more than numRetries for any action // In this case, we will finish the current retries but we won't start new ones. boolean lastRetry = false; - // despite its name numRetries means number of tries. So if numRetries == 1 it means we - // won't retry. And we compare vs. 2 in case someone set it to zero. + // If hci.numTries is 1 or 0, we do not retry. boolean noRetry = (hci.numTries < 2); - // Analyze and resubmit until all actions are done successfully or failed after numRetries + // Analyze and resubmit until all actions are done successfully or failed after numTries while (!this.inProgress.isEmpty()) { // We need the original multi action to find out what actions to replay if // we have a 'total' failure of the Future @@ -2355,8 +2359,9 @@ public class HConnectionManager { // Retry all actions in toReplay then clear it. if (!noRetry && !toReplay.isEmpty()) { if (isTraceEnabled) { - LOG.trace("Retrying due to errors" + (lastRetry ? " (one last time)" : "") - + ": " + retriedErrors.getDescriptionAndClear()); + LOG.trace("Retrying #" + this.curNumRetries + + (lastRetry ? " (one last time)": "") + " because " + + retriedErrors.getDescriptionAndClear()); } doRetry(); if (lastRetry) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java index 4b5d8f1834d..bdcf32638cb 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java @@ -156,8 +156,8 @@ public class MetaScanner { int rows = Math.min(rowLimit, configuration.getInt(HConstants.HBASE_META_SCANNER_CACHING, HConstants.DEFAULT_HBASE_META_SCANNER_CACHING)); scan.setCaching(rows); - if (LOG.isDebugEnabled()) { - LOG.debug("Scanning " + Bytes.toString(metaTableName) + " starting at row=" + + if (LOG.isTraceEnabled()) { + LOG.trace("Scanning " + Bytes.toString(metaTableName) + " starting at row=" + Bytes.toStringBinary(startRow) + " for max=" + rowUpperLimit + " with caching=" + rows); } // Run the scan diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java index 005b3990bc9..a1e22858d49 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java @@ -126,13 +126,14 @@ extends RetriesExhaustedException { Throwable t = this.exceptions.get(i); Row action = this.actions.get(i); String server = this.hostnameAndPort.get(i); - pw.append("Error"); + pw.append("exception"); if (this.exceptions.size() > 1) { pw.append(" #" + i); } - pw.append(" from [" + server + "] for [" - + ((action == null) ? "unknown key" : Bytes.toStringBinary(action.getRow())) + "]"); + pw.append(" from " + server + " for " + + ((action == null) ? "unknown key" : Bytes.toStringBinary(action.getRow()))); if (t != null) { + pw.println(); t.printStackTrace(pw); } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java index dbd2d10125d..d98e461d4d2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java @@ -94,6 +94,10 @@ public abstract class ServerCallable implements Callable { */ public void prepare(final boolean reload) throws IOException { this.location = connection.getRegionLocation(tableName, row, reload); + if (this.location == null) { + throw new IOException("Failed to find location, tableName=" + tableName + ", row=" + + Bytes.toString(row) + ", reload=" + reload); + } this.stub = connection.getClient(location.getServerName()); } @@ -169,7 +173,7 @@ public abstract class ServerCallable implements Callable { prepare(tries != 0); // if called with false, check table status on ZK return call(); } catch (Throwable t) { - LOG.warn("Call exception, tries=" + tries + ", numRetries=" + numRetries + ": " + t); + LOG.warn("Call exception, tries=" + tries + ", numRetries=" + numRetries, t); t = translateException(t); // translateException throws an exception when we should not retry, i.e. when it's the diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 6af47d7abfc..e18d9da64a1 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -574,7 +574,7 @@ public final class HConstants { /** * Default value of {@link #HBASE_CLIENT_RETRIES_NUMBER}. */ - public static int DEFAULT_HBASE_CLIENT_RETRIES_NUMBER = 10; + public static int DEFAULT_HBASE_CLIENT_RETRIES_NUMBER = 20; /** * Parameter name for client prefetch limit, used as the maximum number of regions diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java index a5224ff678c..9d91a58a9e4 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java @@ -31,13 +31,13 @@ import org.junit.experimental.categories.Category; /** * A system test which does large data ingestion and verify using {@link LoadTestTool}. * It performs a set of actions deterministically using ChaosMonkey, then starts killing - * things randomly. You can configure how long should the load test run by using + * things randomly. You can configure how long should the load test run by using * "hbase.IntegrationTestDataIngestSlowDeterministic.runtime" configuration parameter. */ @Category(IntegrationTests.class) public class IntegrationTestDataIngestSlowDeterministic extends IngestIntegrationTestBase { private static final int SERVER_COUNT = 4; // number of slaves for the smallest cluster - private static final long DEFAULT_RUN_TIME = 30 * 60 * 1000; + private static final long DEFAULT_RUN_TIME = 10 * 60 * 1000; private static final long CHAOS_EVERY_MS = 150 * 1000; // Chaos every 2.5 minutes. private ChaosMonkey monkey; diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java index 06896536914..aa252ee0a62 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java @@ -22,13 +22,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Random; -import java.util.Set; import org.apache.commons.cli.CommandLine; import org.apache.commons.logging.Log; @@ -38,8 +36,8 @@ import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HBaseCluster; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.IntegrationTestDataIngestWithChaosMonkey; +import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Stoppable; @@ -49,7 +47,6 @@ import org.apache.hadoop.util.ToolRunner; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.protobuf.ServiceException; /** * A utility to injects faults in a running cluster. @@ -158,6 +155,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable { /** Returns current region servers */ protected ServerName[] getCurrentServers() throws IOException { Collection regionServers = cluster.getClusterStatus().getServers(); + if (regionServers == null || regionServers.size() <= 0) return new ServerName [] {}; return regionServers.toArray(new ServerName[regionServers.size()]); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java index 232c79789c8..41ba1262641 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java @@ -173,8 +173,8 @@ public class MultiThreadedWriter extends MultiThreadedAction { } public void insert(HTable table, Put put, long keyBase) { + long start = System.currentTimeMillis(); try { - long start = System.currentTimeMillis(); table.put(put); totalOpTimeMs.addAndGet(System.currentTimeMillis() - start); } catch (IOException e) { @@ -190,8 +190,8 @@ public class MultiThreadedWriter extends MultiThreadedAction { pw.flush(); exceptionInfo = StringUtils.stringifyException(e); } - LOG.error("Failed to insert: " + keyBase + "; region information: " - + getRegionDebugInfoSafe(table, put.getRow()) + "; errors: " + LOG.error("Failed to insert: " + keyBase + " after " + (System.currentTimeMillis() - start) + + "ms; region information: " + getRegionDebugInfoSafe(table, put.getRow()) + "; errors: " + exceptionInfo); } }