HBASE-3017 More log pruning
M conf/log4j.properties Make ZKW log at INFO-level M src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Log message cleanup. M src/main/java/org/apache/hadoop/hbase/master/HMaster.java Remove excessive hostname+port qualifier on master for zk messages M src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java Log message cleanup M src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Format the ServerMonitor message. M src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Remove excessive hostname on zk message id; just add port M src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java Cleanup of messages. git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@999057 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7daffb3a42
commit
a3856ca945
|
@ -919,6 +919,7 @@ Release 0.21.0 - Unreleased
|
|||
HBASE-2988 Support alternate compression for major compactions
|
||||
HBASE-2941 port HADOOP-6713 - threading scalability for RPC reads - to HBase
|
||||
HBASE-2782 QOS for META table access
|
||||
HBASE-3017 More log pruning
|
||||
|
||||
NEW FEATURES
|
||||
HBASE-1961 HBase EC2 scripts
|
||||
|
|
|
@ -43,6 +43,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}:
|
|||
log4j.logger.org.apache.zookeeper=INFO
|
||||
#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
|
||||
log4j.logger.org.apache.hadoop.hbase=DEBUG
|
||||
# Make these two classes INFO-level. Make them DEBUG to see more zk debug.
|
||||
log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO
|
||||
log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO
|
||||
#log4j.logger.org.apache.hadoop.dfs=DEBUG
|
||||
# Set this class to log INFO only otherwise its OTT
|
||||
log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO
|
||||
|
|
|
@ -253,7 +253,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
RegionState regionState = regionsInTransition.get(encodedName);
|
||||
switch(data.getEventType()) {
|
||||
case M2ZK_REGION_OFFLINE:
|
||||
LOG.warn("What to do with this event? " + data);
|
||||
// Nothing to do.
|
||||
break;
|
||||
|
||||
case RS2ZK_REGION_CLOSING:
|
||||
|
@ -545,8 +545,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
if (plan == null) {
|
||||
LOG.debug("No previous transition plan for " +
|
||||
state.getRegion().getRegionNameAsString() +
|
||||
" so generating a random one from " + serverManager.countOfRegionServers() +
|
||||
" ( " + serverManager.getOnlineServers().size() + ") available servers");
|
||||
" so generating a random one; " + serverManager.countOfRegionServers() +
|
||||
" (online=" + serverManager.getOnlineServers().size() + ") available servers");
|
||||
plan = new RegionPlan(state.getRegion(), null,
|
||||
LoadBalancer.randomAssignment(serverManager.getOnlineServersList()));
|
||||
regionPlans.put(encodedName, plan);
|
||||
|
|
|
@ -203,8 +203,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
"_" + System.currentTimeMillis());
|
||||
}
|
||||
|
||||
this.zooKeeper =
|
||||
new ZooKeeperWatcher(conf, MASTER + "-" + getMasterAddress(), this);
|
||||
this.zooKeeper = new ZooKeeperWatcher(conf, MASTER, this);
|
||||
|
||||
/*
|
||||
* 2. Block on becoming the active master.
|
||||
|
|
|
@ -165,13 +165,13 @@ public class MasterFileSystem {
|
|||
}
|
||||
for (FileStatus status : logFolders) {
|
||||
String serverName = status.getPath().getName();
|
||||
LOG.info("Found log folder : " + serverName);
|
||||
if(onlineServers.get(serverName) == null) {
|
||||
LOG.info("Log folder doesn't belong " +
|
||||
if (onlineServers.get(serverName) == null) {
|
||||
LOG.info("Log folder " + status.getPath() + " doesn't belong " +
|
||||
"to a known region server, splitting");
|
||||
splitLog(serverName);
|
||||
} else {
|
||||
LOG.info("Log folder belongs to an existing region server");
|
||||
LOG.info("Log folder " + status.getPath() +
|
||||
" belongs to an existing region server");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.text.DecimalFormat;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -93,6 +94,8 @@ public class ServerManager {
|
|||
|
||||
private final DeadServer deadservers = new DeadServer();
|
||||
|
||||
private static final DecimalFormat DF = new DecimalFormat("#.##");
|
||||
|
||||
/**
|
||||
* Dumps into log current stats on dead servers and number of servers
|
||||
* TODO: Make this a metric; dump metrics into log.
|
||||
|
@ -108,10 +111,9 @@ public class ServerManager {
|
|||
int numDeadServers = deadservers.size();
|
||||
double averageLoad = getAverageLoad();
|
||||
String deadServersList = deadservers.toString();
|
||||
LOG.info(numServers + " region servers, " + numDeadServers +
|
||||
" dead, average load " + averageLoad +
|
||||
((deadServersList != null && deadServersList.length() > 0)?
|
||||
deadServersList: ""));
|
||||
LOG.info("regionservers=" + numServers +
|
||||
", averageload=" + DF.format(averageLoad) +
|
||||
((numDeadServers > 0)? ("deadservers=" + deadServersList): ""));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -422,7 +424,7 @@ public class ServerManager {
|
|||
LOG.info("Waiting on following regionserver(s) to go down " +
|
||||
this.onlineServers.values());
|
||||
try {
|
||||
this.onlineServers.wait(500);
|
||||
this.onlineServers.wait(1000);
|
||||
} catch (InterruptedException e) {
|
||||
// continue
|
||||
}
|
||||
|
@ -516,7 +518,7 @@ public class ServerManager {
|
|||
HConnectionManager.getConnection(this.master.getConfiguration());
|
||||
HRegionInterface hri = serverConnections.get(info.getServerName());
|
||||
if (hri == null) {
|
||||
LOG.info("new connection");
|
||||
LOG.debug("New connection to " + info.getServerName());
|
||||
hri = connection.getHRegionConnection(info.getServerAddress(), false);
|
||||
serverConnections.put(info.getServerName(), hri);
|
||||
}
|
||||
|
@ -537,9 +539,10 @@ public class ServerManager {
|
|||
getLong("hbase.master.wait.on.regionservers.interval", 3000);
|
||||
// So, number of regionservers > 0 and its been n since last check in, break,
|
||||
// else just stall here
|
||||
int count = 0;
|
||||
for (int oldcount = countOfRegionServers(); !this.master.isStopped();) {
|
||||
Thread.sleep(interval);
|
||||
int count = countOfRegionServers();
|
||||
count = countOfRegionServers();
|
||||
if (count == oldcount && count > 0) break;
|
||||
if (count == 0) {
|
||||
LOG.info("Waiting on regionserver(s) to checkin");
|
||||
|
@ -548,6 +551,8 @@ public class ServerManager {
|
|||
}
|
||||
oldcount = count;
|
||||
}
|
||||
LOG.info("Exiting wait on regionserver(s) to checkin; count=" + count +
|
||||
", stopped=" + this.master.isStopped());
|
||||
}
|
||||
|
||||
public List<HServerInfo> getOnlineServersList() {
|
||||
|
|
|
@ -431,8 +431,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
|||
|
||||
private void initializeZooKeeper() throws IOException, InterruptedException {
|
||||
// open connection to zookeeper and set primary watcher
|
||||
zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + "-"
|
||||
+ serverInfo.getServerName(), this);
|
||||
zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + ":" +
|
||||
serverInfo.getServerAddress().getPort(), this);
|
||||
|
||||
this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this);
|
||||
this.clusterStatusTracker.start();
|
||||
|
|
|
@ -135,8 +135,8 @@ public class ZKAssign {
|
|||
public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
|
||||
String serverName, final EventType event)
|
||||
throws KeeperException, KeeperException.NodeExistsException {
|
||||
LOG.debug(zkw.prefix("Creating an unassigned node for " +
|
||||
region.getEncodedName() + " in an OFFLINE state"));
|
||||
LOG.debug(zkw.prefix("Creating unassigned node for " +
|
||||
region.getEncodedName() + " in OFFLINE state"));
|
||||
RegionTransitionData data = new RegionTransitionData(event,
|
||||
region.getRegionName(), serverName);
|
||||
synchronized(zkw.getNodes()) {
|
||||
|
@ -167,8 +167,8 @@ public class ZKAssign {
|
|||
public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
|
||||
String serverName)
|
||||
throws KeeperException, KeeperException.NoNodeException {
|
||||
LOG.debug(zkw.prefix("Forcing an existing unassigned node for " +
|
||||
region.getEncodedName() + " to an OFFLINE state"));
|
||||
LOG.debug(zkw.prefix("Forcing existing unassigned node for " +
|
||||
region.getEncodedName() + " to OFFLINE state"));
|
||||
RegionTransitionData data = new RegionTransitionData(
|
||||
EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName);
|
||||
synchronized(zkw.getNodes()) {
|
||||
|
@ -200,8 +200,8 @@ public class ZKAssign {
|
|||
public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw,
|
||||
HRegionInfo region, String serverName)
|
||||
throws KeeperException {
|
||||
LOG.debug(zkw.prefix("Creating or updating an unassigned node for " +
|
||||
region.getEncodedName() + " with an OFFLINE state"));
|
||||
LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
|
||||
region.getEncodedName() + " with OFFLINE state"));
|
||||
RegionTransitionData data = new RegionTransitionData(
|
||||
EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName);
|
||||
synchronized(zkw.getNodes()) {
|
||||
|
@ -319,7 +319,7 @@ public class ZKAssign {
|
|||
private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
|
||||
EventType expectedState)
|
||||
throws KeeperException, KeeperException.NoNodeException {
|
||||
LOG.debug(zkw.prefix("Deleting an existing unassigned " +
|
||||
LOG.debug(zkw.prefix("Deleting existing unassigned " +
|
||||
"node for " + regionName + " that is in expected state " + expectedState));
|
||||
String node = getNodeName(zkw, regionName);
|
||||
Stat stat = new Stat();
|
||||
|
@ -329,7 +329,7 @@ public class ZKAssign {
|
|||
}
|
||||
RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
|
||||
if(!data.getEventType().equals(expectedState)) {
|
||||
LOG.warn(zkw.prefix("Attempting to delete an unassigned " +
|
||||
LOG.warn(zkw.prefix("Attempting to delete unassigned " +
|
||||
"node in " + expectedState +
|
||||
" state but node is in " + data.getEventType() + " state"));
|
||||
return false;
|
||||
|
@ -338,7 +338,7 @@ public class ZKAssign {
|
|||
// TODO: Does this go here or only if we successfully delete node?
|
||||
zkw.getNodes().remove(node);
|
||||
if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
|
||||
LOG.warn(zkw.prefix("Attempting to delete an " +
|
||||
LOG.warn(zkw.prefix("Attempting to delete " +
|
||||
"unassigned node in " + expectedState +
|
||||
" state but " +
|
||||
"after verifying it was in OPENED state, we got a version mismatch"));
|
||||
|
@ -392,7 +392,7 @@ public class ZKAssign {
|
|||
public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
|
||||
String serverName)
|
||||
throws KeeperException, KeeperException.NodeExistsException {
|
||||
LOG.debug(zkw.prefix("Creating an unassigned node for " +
|
||||
LOG.debug(zkw.prefix("Creating unassigned node for " +
|
||||
region.getEncodedName() + " in a CLOSING state"));
|
||||
RegionTransitionData data = new RegionTransitionData(
|
||||
EventType.RS2ZK_REGION_CLOSING, region.getRegionName(), serverName);
|
||||
|
|
Loading…
Reference in New Issue