HBASE-2758 META region stuck in RS2ZK_REGION_OPENED state (Karthik Ranganathan via jgray)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@957099 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
97657cccd2
commit
77fcd6cb63
|
@ -410,6 +410,8 @@ Release 0.21.0 - Unreleased
|
|||
HBASE-2769 Fix typo in warning message for HBaseConfiguration
|
||||
HBASE-2768 Fix teardown order in TestFilter
|
||||
HBASE-2763 Cross-port HADOOP-6833 IPC parameter leak bug
|
||||
HBASE-2758 META region stuck in RS2ZK_REGION_OPENED state
|
||||
(Karthik Ranganathan via jgray)
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-1760 Cleanup TODOs in HTable
|
||||
|
|
|
@ -162,6 +162,9 @@ public class HMaster extends Thread implements HMasterInterface,
|
|||
private Map<String, Integer> fragmentation = null;
|
||||
private final RegionServerOperationQueue regionServerOperationQueue;
|
||||
|
||||
// True if this is the master that started the cluster.
|
||||
boolean isClusterStartup;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param conf configuration
|
||||
|
@ -169,6 +172,14 @@ public class HMaster extends Thread implements HMasterInterface,
|
|||
*/
|
||||
public HMaster(Configuration conf) throws IOException {
|
||||
this.conf = conf;
|
||||
|
||||
// Figure out if this is a fresh cluster start. This is done by checking the
|
||||
// number of RS ephemeral nodes. RS ephemeral nodes are created only after
|
||||
// the primary master has written the address to ZK. So this has to be done
|
||||
// before we race to write our address to zookeeper.
|
||||
zooKeeperWrapper = ZooKeeperWrapper.createInstance(conf, HMaster.class.getName());
|
||||
isClusterStartup = (zooKeeperWrapper.scanRSDirectory().size() == 0);
|
||||
|
||||
// Set filesystem to be that of this.rootdir else we get complaints about
|
||||
// mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is
|
||||
// default localfs. Presumption is that rootdir is fully-qualified before
|
||||
|
@ -206,8 +217,6 @@ public class HMaster extends Thread implements HMasterInterface,
|
|||
// We'll succeed if we are only master or if we win the race when many
|
||||
// masters. Otherwise we park here inside in writeAddressToZooKeeper.
|
||||
// TODO: Bring up the UI to redirect to active Master.
|
||||
zooKeeperWrapper =
|
||||
ZooKeeperWrapper.createInstance(conf, HMaster.class.getName());
|
||||
zooKeeperWrapper.registerListener(this);
|
||||
this.zkMasterAddressWatcher =
|
||||
new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
|
||||
|
@ -219,10 +228,10 @@ public class HMaster extends Thread implements HMasterInterface,
|
|||
serverManager = new ServerManager(this);
|
||||
|
||||
|
||||
// Start the unassigned watcher - which will create the unassgined region
|
||||
// Start the unassigned watcher - which will create the unassigned region
|
||||
// in ZK. This is needed before RegionManager() constructor tries to assign
|
||||
// the root region.
|
||||
ZKUnassignedWatcher.start(this.conf, serverManager, address.toString());
|
||||
ZKUnassignedWatcher.start(this.conf, this);
|
||||
// start the "close region" executor service
|
||||
HBaseEventType.RS2ZK_REGION_CLOSED.startMasterExecutorService(address.toString());
|
||||
// start the "open region" executor service
|
||||
|
@ -239,6 +248,22 @@ public class HMaster extends Thread implements HMasterInterface,
|
|||
LOG.info("HMaster initialized on " + this.address.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this master process was responsible for starting the
|
||||
* cluster.
|
||||
*/
|
||||
public boolean isClusterStartup() {
|
||||
return isClusterStartup;
|
||||
}
|
||||
|
||||
public void resetClusterStartup() {
|
||||
isClusterStartup = false;
|
||||
}
|
||||
|
||||
public HServerAddress getHServerAddress() {
|
||||
return address;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the rootdir. Make sure its wholesome and exists before returning.
|
||||
* @param rd
|
||||
|
@ -1156,6 +1181,9 @@ public class HMaster extends Thread implements HMasterInterface,
|
|||
throw new Exception("Another Master is currently active");
|
||||
}
|
||||
|
||||
// we are a failed over master, reset the fact that we started the
|
||||
// cluster
|
||||
resetClusterStartup();
|
||||
// Verify the cluster to see if anything happened while we were away
|
||||
joinCluster();
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.master.handler.MasterCloseRegionHandler;
|
|||
import org.apache.hadoop.hbase.master.handler.MasterOpenRegionHandler;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper.ZNodePathAndData;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.WatchedEvent;
|
||||
import org.apache.zookeeper.Watcher;
|
||||
import org.apache.zookeeper.Watcher.Event.EventType;
|
||||
|
@ -45,20 +46,37 @@ public class ZKUnassignedWatcher implements Watcher {
|
|||
String serverName;
|
||||
ServerManager serverManager;
|
||||
|
||||
public static void start(Configuration conf, ServerManager serverManager,
|
||||
String serverName) throws IOException {
|
||||
new ZKUnassignedWatcher(conf, serverManager, serverName);
|
||||
public static void start(Configuration conf, HMaster master)
|
||||
throws IOException {
|
||||
new ZKUnassignedWatcher(conf, master);
|
||||
LOG.debug("Started ZKUnassigned watcher");
|
||||
}
|
||||
|
||||
public ZKUnassignedWatcher(Configuration conf, ServerManager serverManager,
|
||||
String serverName) throws IOException {
|
||||
this.serverName = serverName;
|
||||
this.serverManager = serverManager;
|
||||
zkWrapper =
|
||||
ZooKeeperWrapper.getInstance(conf, HMaster.class.getName());
|
||||
public ZKUnassignedWatcher(Configuration conf, HMaster master)
|
||||
throws IOException {
|
||||
this.serverName = master.getHServerAddress().toString();
|
||||
this.serverManager = master.getServerManager();
|
||||
zkWrapper = ZooKeeperWrapper.getInstance(conf, HMaster.class.getName());
|
||||
String unassignedZNode = zkWrapper.getRegionInTransitionZNode();
|
||||
|
||||
// If the UNASSIGNED ZNode exists and this is a fresh cluster start, then
|
||||
// delete it.
|
||||
if(master.isClusterStartup() && zkWrapper.exists(unassignedZNode, false)) {
|
||||
LOG.info("Cluster start, but found " + unassignedZNode + ", deleting it.");
|
||||
try {
|
||||
zkWrapper.deleteZNode(unassignedZNode, true);
|
||||
} catch (KeeperException e) {
|
||||
LOG.error("Could not delete znode " + unassignedZNode, e);
|
||||
throw new IOException(e);
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Could not delete znode " + unassignedZNode, e);
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
// If the UNASSIGNED ZNode does not exist, create it.
|
||||
zkWrapper.createZNodeIfNotExists(zkWrapper.getRegionInTransitionZNode());
|
||||
zkWrapper.createZNodeIfNotExists(unassignedZNode);
|
||||
|
||||
// TODO: get the outstanding changes in UNASSIGNED
|
||||
|
||||
// Set a watch on Zookeeper's UNASSIGNED node if it exists.
|
||||
|
@ -66,7 +84,7 @@ public class ZKUnassignedWatcher implements Watcher {
|
|||
}
|
||||
|
||||
/**
|
||||
* This is the processing loop that gets triggerred from the ZooKeeperWrapper.
|
||||
* This is the processing loop that gets triggered from the ZooKeeperWrapper.
|
||||
* This zookeeper events process function dies the following:
|
||||
* - WATCHES the following events: NodeCreated, NodeDataChanged, NodeChildrenChanged
|
||||
* - IGNORES the following events: None, NodeDeleted
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
* Copyright 2010 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.executor.RegionTransitionEventData;
|
||||
import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestRestartCluster {
|
||||
private static final Log LOG = LogFactory.getLog(TestRestartCluster.class);
|
||||
private static Configuration conf;
|
||||
private static HBaseTestingUtility utility;
|
||||
private static ZooKeeperWrapper zkWrapper;
|
||||
private static final byte[] TABLENAME = Bytes.toBytes("master_transitions");
|
||||
private static final byte [][] FAMILIES = new byte [][] {Bytes.toBytes("a")};
|
||||
|
||||
@BeforeClass public static void beforeAllTests() throws Exception {
|
||||
conf = HBaseConfiguration.create();
|
||||
utility = new HBaseTestingUtility(conf);
|
||||
}
|
||||
|
||||
@AfterClass public static void afterAllTests() throws IOException {
|
||||
utility.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Before public void setup() throws IOException {
|
||||
}
|
||||
|
||||
@Test (timeout=300000) public void testRestartClusterAfterKill()throws Exception {
|
||||
utility.startMiniZKCluster();
|
||||
zkWrapper = ZooKeeperWrapper.createInstance(conf, "cluster1");
|
||||
|
||||
// create the unassigned region, throw up a region opened state for META
|
||||
String unassignedZNode = zkWrapper.getRegionInTransitionZNode();
|
||||
zkWrapper.createZNodeIfNotExists(unassignedZNode);
|
||||
byte[] data = null;
|
||||
HBaseEventType hbEventType = HBaseEventType.RS2ZK_REGION_OPENED;
|
||||
try {
|
||||
data = Writables.getBytes(new RegionTransitionEventData(hbEventType, HMaster.MASTER));
|
||||
} catch (IOException e) {
|
||||
LOG.error("Error creating event data for " + hbEventType, e);
|
||||
}
|
||||
zkWrapper.createUnassignedRegion(HRegionInfo.ROOT_REGIONINFO.getEncodedName(), data);
|
||||
zkWrapper.createUnassignedRegion(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), data);
|
||||
LOG.debug("Created UNASSIGNED zNode for ROOT and META regions in state " + HBaseEventType.M2ZK_REGION_OFFLINE);
|
||||
|
||||
// start the HB cluster
|
||||
LOG.info("Starting HBase cluster...");
|
||||
utility.startMiniCluster(2);
|
||||
|
||||
utility.createTable(TABLENAME, FAMILIES);
|
||||
LOG.info("Created a table, waiting for table to be available...");
|
||||
utility.waitTableAvailable(TABLENAME, 60*1000);
|
||||
|
||||
LOG.info("Master deleted unassgined region and started up successfully.");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue