From a3856ca9457b0a4a2b0e5f2504b2b0332cbf0e51 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Mon, 20 Sep 2010 19:04:41 +0000 Subject: [PATCH] HBASE-3017 More log pruning M conf/log4j.properties Make ZKW log at INFO-level M src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Log message cleanup. M src/main/java/org/apache/hadoop/hbase/master/HMaster.java Remove excessive hostname+port qualifier on master for zk messages M src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java Log message cleanup M src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Format the ServerMonitor message. M src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Remove excessive hostname on zk message id; just add port M src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java Cleanup of messages. git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@999057 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + conf/log4j.properties | 4 +++- .../hbase/master/AssignmentManager.java | 6 +++--- .../apache/hadoop/hbase/master/HMaster.java | 3 +-- .../hadoop/hbase/master/MasterFileSystem.java | 10 +++++----- .../hadoop/hbase/master/ServerManager.java | 19 +++++++++++------- .../hbase/regionserver/HRegionServer.java | 4 ++-- .../hadoop/hbase/zookeeper/ZKAssign.java | 20 +++++++++---------- 8 files changed, 37 insertions(+), 30 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 796a42c3210..8340b6e9619 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -919,6 +919,7 @@ Release 0.21.0 - Unreleased HBASE-2988 Support alternate compression for major compactions HBASE-2941 port HADOOP-6713 - threading scalability for RPC reads - to HBase HBASE-2782 QOS for META table access + HBASE-3017 More log pruning NEW FEATURES HBASE-1961 HBase EC2 scripts diff --git a/conf/log4j.properties b/conf/log4j.properties index ec341410a9c..62b55211383 100644 --- a/conf/log4j.properties +++ b/conf/log4j.properties @@ -43,6 +43,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: log4j.logger.org.apache.zookeeper=INFO #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG log4j.logger.org.apache.hadoop.hbase=DEBUG +# Make these two classes INFO-level. Make them DEBUG to see more zk debug. +log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO +log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO #log4j.logger.org.apache.hadoop.dfs=DEBUG # Set this class to log INFO only otherwise its OTT -log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO diff --git a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 557bc0499cd..8607267af62 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -253,7 +253,7 @@ public class AssignmentManager extends ZooKeeperListener { RegionState regionState = regionsInTransition.get(encodedName); switch(data.getEventType()) { case M2ZK_REGION_OFFLINE: - LOG.warn("What to do with this event? " + data); + // Nothing to do. break; case RS2ZK_REGION_CLOSING: @@ -545,8 +545,8 @@ public class AssignmentManager extends ZooKeeperListener { if (plan == null) { LOG.debug("No previous transition plan for " + state.getRegion().getRegionNameAsString() + - " so generating a random one from " + serverManager.countOfRegionServers() + - " ( " + serverManager.getOnlineServers().size() + ") available servers"); + " so generating a random one; " + serverManager.countOfRegionServers() + + " (online=" + serverManager.getOnlineServers().size() + ") available servers"); plan = new RegionPlan(state.getRegion(), null, LoadBalancer.randomAssignment(serverManager.getOnlineServersList())); regionPlans.put(encodedName, plan); diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index bccdc0e502e..f580c0b9313 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -203,8 +203,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { "_" + System.currentTimeMillis()); } - this.zooKeeper = - new ZooKeeperWatcher(conf, MASTER + "-" + getMasterAddress(), this); + this.zooKeeper = new ZooKeeperWatcher(conf, MASTER, this); /* * 2. Block on becoming the active master. diff --git a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index d870d443f0c..04650edb8f2 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -165,13 +165,13 @@ public class MasterFileSystem { } for (FileStatus status : logFolders) { String serverName = status.getPath().getName(); - LOG.info("Found log folder : " + serverName); - if(onlineServers.get(serverName) == null) { - LOG.info("Log folder doesn't belong " + + if (onlineServers.get(serverName) == null) { + LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); splitLog(serverName); } else { - LOG.info("Log folder belongs to an existing region server"); + LOG.info("Log folder " + status.getPath() + + " belongs to an existing region server"); } } } @@ -279,4 +279,4 @@ public class MasterFileSystem { new Path(rootdir, region.getTableDesc().getNameAsString()), region.getEncodedName(), familyName), true); } -} \ No newline at end of file +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 581fd108644..3c6b09cce29 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.text.DecimalFormat; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -93,6 +94,8 @@ public class ServerManager { private final DeadServer deadservers = new DeadServer(); + private static final DecimalFormat DF = new DecimalFormat("#.##"); + /** * Dumps into log current stats on dead servers and number of servers * TODO: Make this a metric; dump metrics into log. @@ -108,10 +111,9 @@ public class ServerManager { int numDeadServers = deadservers.size(); double averageLoad = getAverageLoad(); String deadServersList = deadservers.toString(); - LOG.info(numServers + " region servers, " + numDeadServers + - " dead, average load " + averageLoad + - ((deadServersList != null && deadServersList.length() > 0)? - deadServersList: "")); + LOG.info("regionservers=" + numServers + + ", averageload=" + DF.format(averageLoad) + + ((numDeadServers > 0)? ("deadservers=" + deadServersList): "")); } } @@ -422,7 +424,7 @@ public class ServerManager { LOG.info("Waiting on following regionserver(s) to go down " + this.onlineServers.values()); try { - this.onlineServers.wait(500); + this.onlineServers.wait(1000); } catch (InterruptedException e) { // continue } @@ -516,7 +518,7 @@ public class ServerManager { HConnectionManager.getConnection(this.master.getConfiguration()); HRegionInterface hri = serverConnections.get(info.getServerName()); if (hri == null) { - LOG.info("new connection"); + LOG.debug("New connection to " + info.getServerName()); hri = connection.getHRegionConnection(info.getServerAddress(), false); serverConnections.put(info.getServerName(), hri); } @@ -537,9 +539,10 @@ public class ServerManager { getLong("hbase.master.wait.on.regionservers.interval", 3000); // So, number of regionservers > 0 and its been n since last check in, break, // else just stall here + int count = 0; for (int oldcount = countOfRegionServers(); !this.master.isStopped();) { Thread.sleep(interval); - int count = countOfRegionServers(); + count = countOfRegionServers(); if (count == oldcount && count > 0) break; if (count == 0) { LOG.info("Waiting on regionserver(s) to checkin"); @@ -548,6 +551,8 @@ public class ServerManager { } oldcount = count; } + LOG.info("Exiting wait on regionserver(s) to checkin; count=" + count + + ", stopped=" + this.master.isStopped()); } public List getOnlineServersList() { diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index f2e4e7cbae3..afbc77b34f7 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -431,8 +431,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, private void initializeZooKeeper() throws IOException, InterruptedException { // open connection to zookeeper and set primary watcher - zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + "-" - + serverInfo.getServerName(), this); + zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + ":" + + serverInfo.getServerAddress().getPort(), this); this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this); this.clusterStatusTracker.start(); diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java index a26ff65eefc..04a30cf831b 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java @@ -135,8 +135,8 @@ public class ZKAssign { public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, String serverName, final EventType event) throws KeeperException, KeeperException.NodeExistsException { - LOG.debug(zkw.prefix("Creating an unassigned node for " + - region.getEncodedName() + " in an OFFLINE state")); + LOG.debug(zkw.prefix("Creating unassigned node for " + + region.getEncodedName() + " in OFFLINE state")); RegionTransitionData data = new RegionTransitionData(event, region.getRegionName(), serverName); synchronized(zkw.getNodes()) { @@ -167,8 +167,8 @@ public class ZKAssign { public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, String serverName) throws KeeperException, KeeperException.NoNodeException { - LOG.debug(zkw.prefix("Forcing an existing unassigned node for " + - region.getEncodedName() + " to an OFFLINE state")); + LOG.debug(zkw.prefix("Forcing existing unassigned node for " + + region.getEncodedName() + " to OFFLINE state")); RegionTransitionData data = new RegionTransitionData( EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName); synchronized(zkw.getNodes()) { @@ -200,8 +200,8 @@ public class ZKAssign { public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, String serverName) throws KeeperException { - LOG.debug(zkw.prefix("Creating or updating an unassigned node for " + - region.getEncodedName() + " with an OFFLINE state")); + LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " + + region.getEncodedName() + " with OFFLINE state")); RegionTransitionData data = new RegionTransitionData( EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName); synchronized(zkw.getNodes()) { @@ -319,7 +319,7 @@ public class ZKAssign { private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName, EventType expectedState) throws KeeperException, KeeperException.NoNodeException { - LOG.debug(zkw.prefix("Deleting an existing unassigned " + + LOG.debug(zkw.prefix("Deleting existing unassigned " + "node for " + regionName + " that is in expected state " + expectedState)); String node = getNodeName(zkw, regionName); Stat stat = new Stat(); @@ -329,7 +329,7 @@ public class ZKAssign { } RegionTransitionData data = RegionTransitionData.fromBytes(bytes); if(!data.getEventType().equals(expectedState)) { - LOG.warn(zkw.prefix("Attempting to delete an unassigned " + + LOG.warn(zkw.prefix("Attempting to delete unassigned " + "node in " + expectedState + " state but node is in " + data.getEventType() + " state")); return false; @@ -338,7 +338,7 @@ public class ZKAssign { // TODO: Does this go here or only if we successfully delete node? zkw.getNodes().remove(node); if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { - LOG.warn(zkw.prefix("Attempting to delete an " + + LOG.warn(zkw.prefix("Attempting to delete " + "unassigned node in " + expectedState + " state but " + "after verifying it was in OPENED state, we got a version mismatch")); @@ -392,7 +392,7 @@ public class ZKAssign { public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, String serverName) throws KeeperException, KeeperException.NodeExistsException { - LOG.debug(zkw.prefix("Creating an unassigned node for " + + LOG.debug(zkw.prefix("Creating unassigned node for " + region.getEncodedName() + " in a CLOSING state")); RegionTransitionData data = new RegionTransitionData( EventType.RS2ZK_REGION_CLOSING, region.getRegionName(), serverName);