HBASE-12238 A few ugly exceptions on startup

Conflicts:
	hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
This commit is contained in:
stack 2014-10-29 21:42:29 -07:00
parent dc78831019
commit b069c10af4
4 changed files with 57 additions and 22 deletions

View File

@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
@ -48,6 +49,9 @@ import java.net.SocketException;
import java.net.SocketTimeoutException; import java.net.SocketTimeoutException;
import java.rmi.UnknownHostException; import java.rmi.UnknownHostException;
import java.util.List;
import java.util.ArrayList;
/** /**
* Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper
* which keeps hbase:meta region server location. * which keeps hbase:meta region server location.
@ -78,15 +82,38 @@ public class MetaTableLocator {
* @return true if meta region location is available, false if not * @return true if meta region location is available, false if not
*/ */
public boolean isLocationAvailable(ZooKeeperWatcher zkw) { public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
try { return getMetaRegionLocation(zkw) != null;
return ZKUtil.getData(zkw, zkw.metaServerZNode) != null;
} catch(KeeperException e) {
LOG.error("ZK error trying to get hbase:meta from ZooKeeper");
return false;
} catch (InterruptedException e) {
LOG.error("ZK error trying to get hbase:meta from ZooKeeper");
return false;
} }
/**
* @param zkw ZooKeeper watcher to be used
* @return meta table regions and their locations.
*/
public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
ServerName serverName = new MetaTableLocator().getMetaRegionLocation(zkw);
List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
list.add(new Pair<HRegionInfo, ServerName>(HRegionInfo.FIRST_META_REGIONINFO, serverName));
return list;
}
/**
* @param zkw ZooKeeper watcher to be used
* @return List of meta regions
*/
public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
List<Pair<HRegionInfo, ServerName>> result;
result = getMetaRegionsAndLocations(zkw);
return getListOfHRegionInfos(result);
}
private List<HRegionInfo> getListOfHRegionInfos(
final List<Pair<HRegionInfo, ServerName>> pairs) {
if (pairs == null || pairs.isEmpty()) return null;
List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
for (Pair<HRegionInfo, ServerName> pair: pairs) {
result.add(pair.getFirst());
}
return result;
} }
/** /**
@ -239,7 +266,7 @@ public class MetaTableLocator {
} }
} }
LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) + LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
" at address=" + address + ", exception=" + t); " at address=" + address + ", exception=" + t.getMessage());
return false; return false;
} }

View File

@ -256,7 +256,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
* </ol> * </ol>
* <p> * <p>
* Remaining steps of initialization occur in * Remaining steps of initialization occur in
* {@link #finishActiveMasterInitialization(MonitoredTask)} after * #finishActiveMasterInitialization(MonitoredTask) after
* the master becomes the active one. * the master becomes the active one.
* *
* @throws InterruptedException * @throws InterruptedException
@ -286,6 +286,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this)); this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
// Do we publish the status? // Do we publish the status?
boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED, boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
HConstants.STATUS_PUBLISHED_DEFAULT); HConstants.STATUS_PUBLISHED_DEFAULT);
Class<? extends ClusterStatusPublisher.Publisher> publisherClass = Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
@ -1261,8 +1262,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
* this node explicitly. If we crash before then, ZooKeeper will delete * this node explicitly. If we crash before then, ZooKeeper will delete
* this node for us since it is ephemeral. * this node for us since it is ephemeral.
*/ */
LOG.info("Adding ZNode for " + backupZNode + " in backup master directory"); LOG.info("Adding backup master ZNode " + backupZNode);
MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName); if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName)) {
LOG.warn("Failed create of " + backupZNode + " by " + serverName);
}
activeMasterManager = new ActiveMasterManager(zooKeeper, serverName, this); activeMasterManager = new ActiveMasterManager(zooKeeper, serverName, this);
// Start a thread to try to become the active master, so we won't block here // Start a thread to try to become the active master, so we won't block here

View File

@ -166,7 +166,10 @@ public class HMasterCommandLine extends ServerCommandLine {
// login the zookeeper server principal (if using security) // login the zookeeper server principal (if using security)
ZKUtil.loginServer(conf, "hbase.zookeeper.server.keytab.file", ZKUtil.loginServer(conf, "hbase.zookeeper.server.keytab.file",
"hbase.zookeeper.server.kerberos.principal", null); "hbase.zookeeper.server.kerberos.principal", null);
int localZKClusterSessionTimeout =
conf.getInt(HConstants.ZK_SESSION_TIMEOUT + ".localHBaseCluster", 10*1000);
conf.setInt(HConstants.ZK_SESSION_TIMEOUT, localZKClusterSessionTimeout);
LOG.info("Starting a zookeeper cluster");
int clientPort = zooKeeperCluster.startup(zkDataPath); int clientPort = zooKeeperCluster.startup(zkDataPath);
if (clientPort != zkClientPort) { if (clientPort != zkClientPort) {
String errorMsg = "Could not start ZK at requested port of " + String errorMsg = "Could not start ZK at requested port of " +
@ -176,13 +179,15 @@ public class HMasterCommandLine extends ServerCommandLine {
System.err.println(errorMsg); System.err.println(errorMsg);
throw new IOException(errorMsg); throw new IOException(errorMsg);
} }
conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
Integer.toString(clientPort));
conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 10 *1000);
// Need to have the zk cluster shutdown when master is shutdown. // Need to have the zk cluster shutdown when master is shutdown.
// Run a subclass that does the zk cluster shutdown on its way out. // Run a subclass that does the zk cluster shutdown on its way out.
LocalHBaseCluster cluster = new LocalHBaseCluster(conf, conf.getInt("hbase.masters", 1), int mastersCount = conf.getInt("hbase.masters", 1);
conf.getInt("hbase.regionservers", 1), LocalHMaster.class, HRegionServer.class); int regionServersCount = conf.getInt("hbase.regionservers", 1);
LOG.info("Starting up instance of localHBaseCluster; master=" + mastersCount +
", regionserversCount=" + regionServersCount);
LocalHBaseCluster cluster = new LocalHBaseCluster(conf, mastersCount, regionServersCount,
LocalHMaster.class, HRegionServer.class);
((LocalHMaster)cluster.getMaster(0)).setZKCluster(zooKeeperCluster); ((LocalHMaster)cluster.getMaster(0)).setZKCluster(zooKeeperCluster);
cluster.startup(); cluster.startup();
waitOnMasterThreads(cluster); waitOnMasterThreads(cluster);

View File

@ -165,8 +165,7 @@ public class MiniZooKeeperCluster {
standaloneServerFactory = new NIOServerCnxnFactory(); standaloneServerFactory = new NIOServerCnxnFactory();
standaloneServerFactory.configure( standaloneServerFactory.configure(
new InetSocketAddress(tentativePort), new InetSocketAddress(tentativePort),
configuration.getInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, configuration.getInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, 1000));
1000));
} catch (BindException e) { } catch (BindException e) {
LOG.debug("Failed binding ZK Server to client port: " + LOG.debug("Failed binding ZK Server to client port: " +
tentativePort, e); tentativePort, e);
@ -181,6 +180,7 @@ public class MiniZooKeeperCluster {
// Start up this ZK server // Start up this ZK server
standaloneServerFactory.startup(server); standaloneServerFactory.startup(server);
// Runs a 'stat' against the servers.
if (!waitForServerUp(tentativePort, CONNECTION_TIMEOUT)) { if (!waitForServerUp(tentativePort, CONNECTION_TIMEOUT)) {
throw new IOException("Waiting for startup of standalone server"); throw new IOException("Waiting for startup of standalone server");
} }
@ -196,8 +196,8 @@ public class MiniZooKeeperCluster {
activeZKServerIndex = 0; activeZKServerIndex = 0;
started = true; started = true;
int clientPort = clientPortList.get(activeZKServerIndex); int clientPort = clientPortList.get(activeZKServerIndex);
LOG.info("Started MiniZK Cluster and connect 1 ZK server " + LOG.info("Started MiniZooKeeperCluster and ran successful 'stat' " +
"on client port: " + clientPort); "on client port=" + clientPort);
return clientPort; return clientPort;
} }