HBASE-3017 More log pruning

M conf/log4j.properties
  Make ZKW log at INFO-level
M src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
  Log message cleanup.
M src/main/java/org/apache/hadoop/hbase/master/HMaster.java
  Remove excessive hostname+port qualifier on master for zk messages
M src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
  Log message cleanup
M src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
  Format the ServerMonitor message.
M src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
  Remove excessive hostname on zk message id; just add port
M src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
  Cleanup of messages.


git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@999057 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-09-20 19:04:41 +00:00
parent 7daffb3a42
commit a3856ca945
8 changed files with 37 additions and 30 deletions

View File

@ -919,6 +919,7 @@ Release 0.21.0 - Unreleased
HBASE-2988 Support alternate compression for major compactions
HBASE-2941 port HADOOP-6713 - threading scalability for RPC reads - to HBase
HBASE-2782 QOS for META table access
HBASE-3017 More log pruning
NEW FEATURES
HBASE-1961 HBase EC2 scripts

View File

@ -43,6 +43,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}:
log4j.logger.org.apache.zookeeper=INFO
#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
log4j.logger.org.apache.hadoop.hbase=DEBUG
# Make these two classes INFO-level. Make them DEBUG to see more zk debug.
log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO
log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO
#log4j.logger.org.apache.hadoop.dfs=DEBUG
# Set this class to log INFO only otherwise its OTT
log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO

View File

@ -253,7 +253,7 @@ public class AssignmentManager extends ZooKeeperListener {
RegionState regionState = regionsInTransition.get(encodedName);
switch(data.getEventType()) {
case M2ZK_REGION_OFFLINE:
LOG.warn("What to do with this event? " + data);
// Nothing to do.
break;
case RS2ZK_REGION_CLOSING:
@ -545,8 +545,8 @@ public class AssignmentManager extends ZooKeeperListener {
if (plan == null) {
LOG.debug("No previous transition plan for " +
state.getRegion().getRegionNameAsString() +
" so generating a random one from " + serverManager.countOfRegionServers() +
" ( " + serverManager.getOnlineServers().size() + ") available servers");
" so generating a random one; " + serverManager.countOfRegionServers() +
" (online=" + serverManager.getOnlineServers().size() + ") available servers");
plan = new RegionPlan(state.getRegion(), null,
LoadBalancer.randomAssignment(serverManager.getOnlineServersList()));
regionPlans.put(encodedName, plan);

View File

@ -203,8 +203,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
"_" + System.currentTimeMillis());
}
this.zooKeeper =
new ZooKeeperWatcher(conf, MASTER + "-" + getMasterAddress(), this);
this.zooKeeper = new ZooKeeperWatcher(conf, MASTER, this);
/*
* 2. Block on becoming the active master.

View File

@ -165,13 +165,13 @@ public class MasterFileSystem {
}
for (FileStatus status : logFolders) {
String serverName = status.getPath().getName();
LOG.info("Found log folder : " + serverName);
if(onlineServers.get(serverName) == null) {
LOG.info("Log folder doesn't belong " +
if (onlineServers.get(serverName) == null) {
LOG.info("Log folder " + status.getPath() + " doesn't belong " +
"to a known region server, splitting");
splitLog(serverName);
} else {
LOG.info("Log folder belongs to an existing region server");
LOG.info("Log folder " + status.getPath() +
" belongs to an existing region server");
}
}
}
@ -279,4 +279,4 @@ public class MasterFileSystem {
new Path(rootdir, region.getTableDesc().getNameAsString()),
region.getEncodedName(), familyName), true);
}
}
}

View File

@ -27,6 +27,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.text.DecimalFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -93,6 +94,8 @@ public class ServerManager {
private final DeadServer deadservers = new DeadServer();
private static final DecimalFormat DF = new DecimalFormat("#.##");
/**
* Dumps into log current stats on dead servers and number of servers
* TODO: Make this a metric; dump metrics into log.
@ -108,10 +111,9 @@ public class ServerManager {
int numDeadServers = deadservers.size();
double averageLoad = getAverageLoad();
String deadServersList = deadservers.toString();
LOG.info(numServers + " region servers, " + numDeadServers +
" dead, average load " + averageLoad +
((deadServersList != null && deadServersList.length() > 0)?
deadServersList: ""));
LOG.info("regionservers=" + numServers +
", averageload=" + DF.format(averageLoad) +
((numDeadServers > 0)? ("deadservers=" + deadServersList): ""));
}
}
@ -422,7 +424,7 @@ public class ServerManager {
LOG.info("Waiting on following regionserver(s) to go down " +
this.onlineServers.values());
try {
this.onlineServers.wait(500);
this.onlineServers.wait(1000);
} catch (InterruptedException e) {
// continue
}
@ -516,7 +518,7 @@ public class ServerManager {
HConnectionManager.getConnection(this.master.getConfiguration());
HRegionInterface hri = serverConnections.get(info.getServerName());
if (hri == null) {
LOG.info("new connection");
LOG.debug("New connection to " + info.getServerName());
hri = connection.getHRegionConnection(info.getServerAddress(), false);
serverConnections.put(info.getServerName(), hri);
}
@ -537,9 +539,10 @@ public class ServerManager {
getLong("hbase.master.wait.on.regionservers.interval", 3000);
// So, number of regionservers > 0 and its been n since last check in, break,
// else just stall here
int count = 0;
for (int oldcount = countOfRegionServers(); !this.master.isStopped();) {
Thread.sleep(interval);
int count = countOfRegionServers();
count = countOfRegionServers();
if (count == oldcount && count > 0) break;
if (count == 0) {
LOG.info("Waiting on regionserver(s) to checkin");
@ -548,6 +551,8 @@ public class ServerManager {
}
oldcount = count;
}
LOG.info("Exiting wait on regionserver(s) to checkin; count=" + count +
", stopped=" + this.master.isStopped());
}
public List<HServerInfo> getOnlineServersList() {

View File

@ -431,8 +431,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
private void initializeZooKeeper() throws IOException, InterruptedException {
// open connection to zookeeper and set primary watcher
zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + "-"
+ serverInfo.getServerName(), this);
zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + ":" +
serverInfo.getServerAddress().getPort(), this);
this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this);
this.clusterStatusTracker.start();

View File

@ -135,8 +135,8 @@ public class ZKAssign {
public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
String serverName, final EventType event)
throws KeeperException, KeeperException.NodeExistsException {
LOG.debug(zkw.prefix("Creating an unassigned node for " +
region.getEncodedName() + " in an OFFLINE state"));
LOG.debug(zkw.prefix("Creating unassigned node for " +
region.getEncodedName() + " in OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(event,
region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
@ -167,8 +167,8 @@ public class ZKAssign {
public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
String serverName)
throws KeeperException, KeeperException.NoNodeException {
LOG.debug(zkw.prefix("Forcing an existing unassigned node for " +
region.getEncodedName() + " to an OFFLINE state"));
LOG.debug(zkw.prefix("Forcing existing unassigned node for " +
region.getEncodedName() + " to OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(
EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
@ -200,8 +200,8 @@ public class ZKAssign {
public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw,
HRegionInfo region, String serverName)
throws KeeperException {
LOG.debug(zkw.prefix("Creating or updating an unassigned node for " +
region.getEncodedName() + " with an OFFLINE state"));
LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
region.getEncodedName() + " with OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(
EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
@ -319,7 +319,7 @@ public class ZKAssign {
private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
EventType expectedState)
throws KeeperException, KeeperException.NoNodeException {
LOG.debug(zkw.prefix("Deleting an existing unassigned " +
LOG.debug(zkw.prefix("Deleting existing unassigned " +
"node for " + regionName + " that is in expected state " + expectedState));
String node = getNodeName(zkw, regionName);
Stat stat = new Stat();
@ -329,7 +329,7 @@ public class ZKAssign {
}
RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
if(!data.getEventType().equals(expectedState)) {
LOG.warn(zkw.prefix("Attempting to delete an unassigned " +
LOG.warn(zkw.prefix("Attempting to delete unassigned " +
"node in " + expectedState +
" state but node is in " + data.getEventType() + " state"));
return false;
@ -338,7 +338,7 @@ public class ZKAssign {
// TODO: Does this go here or only if we successfully delete node?
zkw.getNodes().remove(node);
if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
LOG.warn(zkw.prefix("Attempting to delete an " +
LOG.warn(zkw.prefix("Attempting to delete " +
"unassigned node in " + expectedState +
" state but " +
"after verifying it was in OPENED state, we got a version mismatch"));
@ -392,7 +392,7 @@ public class ZKAssign {
public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
String serverName)
throws KeeperException, KeeperException.NodeExistsException {
LOG.debug(zkw.prefix("Creating an unassigned node for " +
LOG.debug(zkw.prefix("Creating unassigned node for " +
region.getEncodedName() + " in a CLOSING state"));
RegionTransitionData data = new RegionTransitionData(
EventType.RS2ZK_REGION_CLOSING, region.getRegionName(), serverName);