HBASE-3789 Cleanup the locking contention in the master

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1136140 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jean-Daniel Cryans 2011-06-15 18:21:14 +00:00
parent 2aade76735
commit 0a585ffbc8
13 changed files with 189 additions and 261 deletions

View File

@ -263,6 +263,7 @@ Release 0.91.0 - Unreleased
HBASE-3982 Improvements to TestHFileSeek
HBASE-3940 HBase daemons should log version info at startup and possibly
periodically (Li Pi)
HBASE-3789 Cleanup the locking contention in the master
TASKS
HBASE-3559 Move report of split to master OFF the heartbeat channel

View File

@ -38,6 +38,7 @@ import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -70,7 +71,6 @@ import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKTable;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.io.Writable;
@ -446,10 +446,17 @@ public class AssignmentManager extends ZooKeeperListener {
if (!isInStateForSplitting(regionState)) break;
// If null, add SPLITTING state before going to SPLIT
if (regionState == null) {
LOG.info("Received SPLIT for region " + prettyPrintedRegionName +
" from server " + sn +
" but region was not first in SPLITTING state; continuing");
addSplittingToRIT(sn, encodedName);
regionState = addSplittingToRIT(sn, encodedName);
String message = "Received SPLIT for region " + prettyPrintedRegionName +
" from server " + sn;
// If still null, it means we cannot find it and it was already processed
if (regionState == null) {
LOG.warn(message + " but it doesn't exist anymore," +
" probably already processed its split");
break;
}
LOG.info(message +
" but region was not first in SPLITTING state; continuing");
}
// Check it has daughters.
byte [] payload = data.getPayload();
@ -589,8 +596,10 @@ public class AssignmentManager extends ZooKeeperListener {
RegionState regionState = null;
synchronized (this.regions) {
regionState = findHRegionInfoThenAddToRIT(serverName, encodedName);
regionState.update(RegionState.State.SPLITTING,
if (regionState != null) {
regionState.update(RegionState.State.SPLITTING,
System.currentTimeMillis(), serverName);
}
}
return regionState;
}
@ -689,16 +698,14 @@ public class AssignmentManager extends ZooKeeperListener {
@Override
public void nodeCreated(String path) {
if(path.startsWith(watcher.assignmentZNode)) {
synchronized(regionsInTransition) {
try {
RegionTransitionData data = ZKAssign.getData(watcher, path);
if(data == null) {
return;
}
handleRegion(data);
} catch (KeeperException e) {
master.abort("Unexpected ZK exception reading unassigned node data", e);
try {
RegionTransitionData data = ZKAssign.getData(watcher, path);
if (data == null) {
return;
}
handleRegion(data);
} catch (KeeperException e) {
master.abort("Unexpected ZK exception reading unassigned node data", e);
}
}
}
@ -718,16 +725,14 @@ public class AssignmentManager extends ZooKeeperListener {
@Override
public void nodeDataChanged(String path) {
if(path.startsWith(watcher.assignmentZNode)) {
synchronized(regionsInTransition) {
try {
RegionTransitionData data = ZKAssign.getData(watcher, path);
if (data == null) {
return;
}
handleRegion(data);
} catch (KeeperException e) {
master.abort("Unexpected ZK exception reading unassigned node data", e);
try {
RegionTransitionData data = ZKAssign.getData(watcher, path);
if (data == null) {
return;
}
handleRegion(data);
} catch (KeeperException e) {
master.abort("Unexpected ZK exception reading unassigned node data", e);
}
}
}
@ -762,23 +767,17 @@ public class AssignmentManager extends ZooKeeperListener {
* <ol>
* <li>Watch the node for further children changed events</li>
* <li>Watch all new children for changed events</li>
* <li>Read all children and handle them</li>
* </ol>
*/
@Override
public void nodeChildrenChanged(String path) {
if(path.equals(watcher.assignmentZNode)) {
synchronized(regionsInTransition) {
try {
List<NodeAndData> newNodes = ZKUtil.watchAndGetNewChildren(watcher,
watcher.assignmentZNode);
for(NodeAndData newNode : newNodes) {
LOG.debug("Handling new unassigned node: " + newNode);
handleRegion(RegionTransitionData.fromBytes(newNode.getData()));
}
} catch(KeeperException e) {
master.abort("Unexpected ZK exception reading unassigned children", e);
}
try {
// Just make sure we see the changes for the new znodes
ZKUtil.listChildrenAndWatchThem(watcher,
watcher.assignmentZNode);
} catch(KeeperException e) {
master.abort("Unexpected ZK exception reading unassigned children", e);
}
}
}
@ -843,9 +842,7 @@ public class AssignmentManager extends ZooKeeperListener {
rs = this.regionsInTransition.get(e.getKey());
}
if (rs == null) continue;
synchronized (rs) {
rs.updateTimestampToNow();
}
rs.updateTimestampToNow();
}
}
@ -1311,9 +1308,20 @@ public class AssignmentManager extends ZooKeeperListener {
synchronized (regionsInTransition) {
state = regionsInTransition.get(encodedName);
if (state == null) {
// Create the znode in CLOSING state
try {
ZKAssign.createNodeClosing(
master.getZooKeeper(), region, master.getServerName());
} catch (KeeperException e) {
master.abort("Unexpected ZK exception creating node CLOSING", e);
return;
}
state = new RegionState(region, RegionState.State.PENDING_CLOSE);
regionsInTransition.put(encodedName, state);
} else if (force && state.isPendingClose()) {
// JD 05/25/11
// in my experience this is useless, when this happens it just spins
LOG.debug("Attempting to unassign region " +
region.getRegionNameAsString() + " which is already pending close "
+ "but forcing an additional close");
@ -1893,9 +1901,7 @@ public class AssignmentManager extends ZooKeeperListener {
" has been CLOSED for too long, waiting on queued " +
"ClosedRegionHandler to run or server shutdown");
// Update our timestamp.
synchronized(regionState) {
regionState.update(regionState.getState());
}
regionState.updateTimestampToNow();
break;
case OFFLINE:
LOG.info("Region has been OFFLINE for too long, " +
@ -2320,10 +2326,13 @@ public class AssignmentManager extends ZooKeeperListener {
}
private State state;
private long stamp;
// Many threads can update the state at the stamp at the same time
private final AtomicLong stamp;
private ServerName serverName;
public RegionState() {}
public RegionState() {
this.stamp = new AtomicLong(System.currentTimeMillis());
}
RegionState(HRegionInfo region, State state) {
this(region, state, System.currentTimeMillis(), null);
@ -2332,24 +2341,28 @@ public class AssignmentManager extends ZooKeeperListener {
RegionState(HRegionInfo region, State state, long stamp, ServerName serverName) {
this.region = region;
this.state = state;
this.stamp = stamp;
this.stamp = new AtomicLong(stamp);
this.serverName = serverName;
}
public void update(State state, long stamp, ServerName serverName) {
this.state = state;
this.stamp = stamp;
updateTimestamp(stamp);
this.serverName = serverName;
}
public void update(State state) {
this.state = state;
this.stamp = System.currentTimeMillis();
updateTimestampToNow();
this.serverName = null;
}
public void updateTimestamp(long stamp) {
this.stamp.set(stamp);
}
public void updateTimestampToNow() {
this.stamp = System.currentTimeMillis();
this.stamp.set(System.currentTimeMillis());
}
public State getState() {
@ -2357,7 +2370,7 @@ public class AssignmentManager extends ZooKeeperListener {
}
public long getStamp() {
return stamp;
return stamp.get();
}
public HRegionInfo getRegion() {
@ -2413,14 +2426,14 @@ public class AssignmentManager extends ZooKeeperListener {
region = new HRegionInfo();
region.readFields(in);
state = State.valueOf(in.readUTF());
stamp = in.readLong();
stamp.set(in.readLong());
}
@Override
public void write(DataOutput out) throws IOException {
region.write(out);
out.writeUTF(state.name());
out.writeLong(stamp);
out.writeLong(stamp.get());
}
}

View File

@ -304,14 +304,36 @@ public class SplitTransaction {
// Tell master about split by updating zk. If we fail, abort.
if (server != null && server.getZooKeeper() != null) {
try {
transitionNodeSplit(server.getZooKeeper(), parent.getRegionInfo(),
a.getRegionInfo(), b.getRegionInfo(), server.getServerName(),
this.znodeVersion);
this.znodeVersion = transitionNodeSplit(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
server.getServerName(), this.znodeVersion);
int spins = 0;
// Now wait for the master to process the split. We know it's done
// when the znode is deleted. The reason we keep tickling the znode is
// that it's possible for the master to miss an event.
do {
if (spins % 10 == 0) {
LOG.info("Still waiting on the master to process the split for " +
this.parent.getRegionInfo().getEncodedName());
}
Thread.sleep(100);
// When this returns -1 it means the znode doesn't exist
this.znodeVersion = tickleNodeSplit(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
server.getServerName(), this.znodeVersion);
spins++;
} while (this.znodeVersion != -1);
} catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
server.abort("Failed telling master about split", e);
}
}
// Coprocessor callback
if (this.parent.getCoprocessorHost() != null) {
this.parent.getCoprocessorHost().postSplit(a,b);
@ -701,13 +723,10 @@ public class SplitTransaction {
RegionTransitionData data =
new RegionTransitionData(EventType.RS_ZK_REGION_SPLITTING,
region.getRegionName(), serverName);
// This synchronization is copied from ZKAssign.
synchronized(zkw.getNodes()) {
String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
zkw.getNodes().add(node);
if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, data.getBytes())) {
throw new IOException("Failed create of ephemeral " + node);
}
String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, data.getBytes())) {
throw new IOException("Failed create of ephemeral " + node);
}
// Transition node from SPLITTING to SPLITTING and pick up version so we
// can be sure this znode is ours; version is needed deleting.
@ -764,4 +783,14 @@ public class SplitTransaction {
return ZKAssign.transitionNode(zkw, parent, serverName,
EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
}
private static int tickleNodeSplit(ZooKeeperWatcher zkw,
HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
final int znodeVersion)
throws KeeperException, IOException {
byte [] payload = Writables.getBytes(a, b);
return ZKAssign.transitionNode(zkw, parent, serverName,
EventType.RS_ZK_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT,
znodeVersion, payload);
}
}

View File

@ -109,7 +109,7 @@ public class CloseRegionHandler extends EventHandler {
int expectedVersion = FAILED;
if (this.zk) {
expectedVersion = setClosingState();
expectedVersion = getCurrentVersion();
if (expectedVersion == FAILED) return;
}
@ -169,16 +169,16 @@ public class CloseRegionHandler extends EventHandler {
}
/**
* Create ZK node in CLOSING state.
* @return The expectedVersion. If -1, we failed setting CLOSING.
* Get the node's current version
* @return The expectedVersion. If -1, we failed getting the node
*/
private int setClosingState() {
private int getCurrentVersion() {
int expectedVersion = FAILED;
try {
if ((expectedVersion = ZKAssign.createNodeClosing(
server.getZooKeeper(), regionInfo, server.getServerName())) == FAILED) {
LOG.warn("Error creating node in CLOSING state, aborting close of " +
regionInfo.getRegionNameAsString());
if ((expectedVersion = ZKAssign.getVersion(
server.getZooKeeper(), regionInfo)) == FAILED) {
LOG.warn("Error getting node's version in CLOSING state," +
" aborting close of " + regionInfo.getRegionNameAsString());
}
} catch (KeeperException e) {
LOG.warn("Error creating node in CLOSING state, aborting close of " +

View File

@ -95,6 +95,7 @@ public class HBaseFsck {
// Empty regioninfo qualifiers in .META.
private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
private int numThreads = MAX_NUM_THREADS;
private final HBaseAdmin admin;
ThreadPoolExecutor executor; // threads to retrieve data from regionservers
@ -109,7 +110,7 @@ public class HBaseFsck {
throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
this.conf = conf;
HBaseAdmin admin = new HBaseAdmin(conf);
admin = new HBaseAdmin(conf);
status = admin.getMaster().getClusterStatus();
connection = admin.getConnection();
@ -466,7 +467,7 @@ public class HBaseFsck {
if (shouldFix()) {
errors.print("Trying to fix unassigned region...");
setShouldRerun();
HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry);
HBaseFsckRepair.fixUnassigned(this.admin, hbi.metaEntry);
}
} else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "Region "
@ -481,7 +482,7 @@ public class HBaseFsck {
if (shouldFix()) {
errors.print("Trying to fix assignment error...");
setShouldRerun();
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
HBaseFsckRepair.fixDupeAssignment(this.admin, hbi.metaEntry, hbi.deployedOn);
}
} else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
@ -492,7 +493,7 @@ public class HBaseFsck {
if (shouldFix()) {
errors.print("Trying to fix assignment error...");
setShouldRerun();
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
HBaseFsckRepair.fixDupeAssignment(this.admin, hbi.metaEntry, hbi.deployedOn);
}
} else {
errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
@ -735,7 +736,7 @@ public class HBaseFsck {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
// try to fix it (treat it as unassigned region)
HBaseFsckRepair.fixUnassigned(conf, root.metaEntry);
HBaseFsckRepair.fixUnassigned(this.admin, root.metaEntry);
}
}
// If there are more than one regions pretending to hold the .META.
@ -749,7 +750,7 @@ public class HBaseFsck {
for (HbckInfo mRegion : metaRegions) {
deployedOn.add(mRegion.metaEntry.regionServer);
}
HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn);
HBaseFsckRepair.fixDupeAssignment(this.admin, root.metaEntry, deployedOn);
}
}
// rerun hbck with hopefully fixed META

View File

@ -23,16 +23,14 @@ import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.zookeeper.KeeperException;
public class HBaseFsckRepair {
@ -41,14 +39,14 @@ public class HBaseFsckRepair {
* Fix dupe assignment by doing silent closes on each RS hosting the region
* and then force ZK unassigned node to OFFLINE to trigger assignment by
* master.
* @param conf
* @param admin
* @param region
* @param servers
* @throws IOException
* @throws KeeperException
* @throws InterruptedException
*/
public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
public static void fixDupeAssignment(HBaseAdmin admin, HRegionInfo region,
List<ServerName> servers)
throws IOException, KeeperException, InterruptedException {
@ -56,45 +54,33 @@ public class HBaseFsckRepair {
// Close region on the servers silently
for(ServerName server : servers) {
closeRegionSilentlyAndWait(conf, server, actualRegion);
closeRegionSilentlyAndWait(admin.getConfiguration(), server, actualRegion);
}
// Force ZK node to OFFLINE so master assigns
forceOfflineInZK(conf, actualRegion);
forceOfflineInZK(admin, actualRegion);
}
/**
* Fix unassigned by creating/transition the unassigned ZK node for this
* region to OFFLINE state with a special flag to tell the master that this
* is a forced operation by HBCK.
* @param conf
* @param admin
* @param region
* @throws IOException
* @throws KeeperException
*/
public static void fixUnassigned(Configuration conf, HRegionInfo region)
public static void fixUnassigned(HBaseAdmin admin, HRegionInfo region)
throws IOException, KeeperException {
HRegionInfo actualRegion = new HRegionInfo(region);
// Force ZK node to OFFLINE so master assigns
forceOfflineInZK(conf, actualRegion);
forceOfflineInZK(admin, actualRegion);
}
private static void forceOfflineInZK(Configuration conf, final HRegionInfo region)
private static void forceOfflineInZK(HBaseAdmin admin, final HRegionInfo region)
throws ZooKeeperConnectionException, KeeperException, IOException {
HConnectionManager.execute(new HConnectable<Void>(conf) {
@Override
public Void connect(HConnection connection) throws IOException {
try {
ZKAssign.createOrForceNodeOffline(
connection.getZooKeeperWatcher(),
region, HConstants.HBCK_CODE_SERVERNAME);
} catch (KeeperException ke) {
throw new IOException(ke);
}
return null;
}
});
admin.assign(region.getRegionName(), true);
}
private static void closeRegionSilentlyAndWait(Configuration conf,

View File

@ -66,16 +66,16 @@ public class RegionServerTracker extends ZooKeeperListener {
*/
public void start() throws KeeperException, IOException {
watcher.registerListener(this);
List<NodeAndData> servers =
ZKUtil.watchAndGetNewChildren(watcher, watcher.rsZNode);
List<String> servers =
ZKUtil.listChildrenAndWatchThem(watcher, watcher.rsZNode);
add(servers);
}
private void add(final List<NodeAndData> servers) throws IOException {
private void add(final List<String> servers) throws IOException {
synchronized(this.regionServers) {
this.regionServers.clear();
for (NodeAndData n: servers) {
ServerName sn = new ServerName(ZKUtil.getNodeName(n.getNode()));
for (String n: servers) {
ServerName sn = new ServerName(ZKUtil.getNodeName(n));
this.regionServers.add(sn);
}
}
@ -107,8 +107,8 @@ public class RegionServerTracker extends ZooKeeperListener {
public void nodeChildrenChanged(String path) {
if (path.equals(watcher.rsZNode)) {
try {
List<NodeAndData> servers =
ZKUtil.watchAndGetNewChildren(watcher, watcher.rsZNode);
List<String> servers =
ZKUtil.listChildrenAndWatchThem(watcher, watcher.rsZNode);
add(servers);
} catch (IOException e) {
abortable.abort("Unexpected zk exception getting RS nodes", e);

View File

@ -143,11 +143,8 @@ public class ZKAssign {
region.getEncodedName() + " in OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(event,
region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
String node = getNodeName(zkw, region.getEncodedName());
zkw.getNodes().add(node);
ZKUtil.createAndWatch(zkw, node, data.getBytes());
}
String node = getNodeName(zkw, region.getEncodedName());
ZKUtil.createAndWatch(zkw, node, data.getBytes());
}
/**
@ -173,11 +170,8 @@ public class ZKAssign {
region.getEncodedName() + " with OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(
EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
String node = getNodeName(zkw, region.getEncodedName());
zkw.getNodes().add(node);
ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx);
}
String node = getNodeName(zkw, region.getEncodedName());
ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx);
}
/**
@ -205,11 +199,8 @@ public class ZKAssign {
region.getEncodedName() + " to OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(
EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
String node = getNodeName(zkw, region.getEncodedName());
zkw.getNodes().add(node);
ZKUtil.setData(zkw, node, data.getBytes());
}
String node = getNodeName(zkw, region.getEncodedName());
ZKUtil.setData(zkw, node, data.getBytes());
}
@ -238,25 +229,22 @@ public class ZKAssign {
region.getEncodedName() + " with OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(
EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
String node = getNodeName(zkw, region.getEncodedName());
zkw.sync(node);
zkw.getNodes().add(node);
int version = ZKUtil.checkExists(zkw, node);
if(version == -1) {
ZKUtil.createAndWatch(zkw, node, data.getBytes());
String node = getNodeName(zkw, region.getEncodedName());
zkw.sync(node);
int version = ZKUtil.checkExists(zkw, node);
if (version == -1) {
ZKUtil.createAndWatch(zkw, node, data.getBytes());
} else {
if (!ZKUtil.setData(zkw, node, data.getBytes(), version)) {
return false;
} else {
if (!ZKUtil.setData(zkw, node, data.getBytes(), version)) {
return false;
} else {
// We successfully forced to OFFLINE, reset watch and handle if
// the state changed in between our set and the watch
RegionTransitionData curData =
// We successfully forced to OFFLINE, reset watch and handle if
// the state changed in between our set and the watch
RegionTransitionData curData =
ZKAssign.getData(zkw, region.getEncodedName());
if (curData.getEventType() != data.getEventType()) {
// state changed, need to process
return false;
}
if (curData.getEventType() != data.getEventType()) {
// state changed, need to process
return false;
}
}
}
@ -408,19 +396,15 @@ public class ZKAssign {
" state but node is in " + data.getEventType() + " state"));
return false;
}
synchronized(zkw.getNodes()) {
// TODO: Does this go here or only if we successfully delete node?
zkw.getNodes().remove(node);
if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
LOG.warn(zkw.prefix("Attempting to delete " +
if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
LOG.warn(zkw.prefix("Attempting to delete " +
"unassigned node in " + expectedState +
" state but after verifying state, we got a version mismatch"));
return false;
}
LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
regionName + " in expected state " + expectedState));
return true;
" state but after verifying state, we got a version mismatch"));
return false;
}
LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
regionName + " in expected state " + expectedState));
return true;
}
/**
@ -473,11 +457,8 @@ public class ZKAssign {
RegionTransitionData data = new RegionTransitionData(
EventType.RS_ZK_REGION_CLOSING, region.getRegionName(), serverName);
synchronized (zkw.getNodes()) {
String node = getNodeName(zkw, region.getEncodedName());
zkw.getNodes().add(node);
return ZKUtil.createAndWatch(zkw, node, data.getBytes());
}
String node = getNodeName(zkw, region.getEncodedName());
return ZKUtil.createAndWatch(zkw, node, data.getBytes());
}
/**
@ -782,6 +763,19 @@ public class ZKAssign {
return RegionTransitionData.fromBytes(data);
}
/**
* Get the version of the specified znode
* @param zkw zk reference
* @param region region's info
* @return the version of the znode, -1 if it doesn't exist
* @throws KeeperException
*/
public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
throws KeeperException {
String znode = getNodeName(zkw, region.getEncodedName());
return ZKUtil.checkExists(zkw, znode);
}
/**
* Delete the assignment node regardless of its current state.
* <p>

View File

@ -348,30 +348,6 @@ public class ZKUtil {
return children;
}
/**
* Atomically add watches and read data from all unwatched unassigned nodes.
*
* <p>This works because master is the only person deleting nodes.
*/
public static List<NodeAndData> watchAndGetNewChildren(ZooKeeperWatcher zkw,
String baseNode)
throws KeeperException {
List<NodeAndData> newNodes = new ArrayList<NodeAndData>();
synchronized(zkw.getNodes()) {
List<String> nodes =
ZKUtil.listChildrenAndWatchForNewChildren(zkw, baseNode);
for(String node : nodes) {
String nodePath = ZKUtil.joinZNode(baseNode, node);
if(!zkw.getNodes().contains(nodePath)) {
byte [] data = ZKUtil.getDataAndWatch(zkw, nodePath);
newNodes.add(new NodeAndData(nodePath, data));
zkw.getNodes().add(nodePath);
}
}
}
return newNodes;
}
/**
* Simple class to hold a node path and node data.
*/

View File

@ -20,7 +20,6 @@
package org.apache.hadoop.hbase.zookeeper;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
@ -68,9 +67,6 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
private final List<ZooKeeperListener> listeners =
new CopyOnWriteArrayList<ZooKeeperListener>();
// set of unassigned nodes watched
private Set<String> unassignedNodes = new HashSet<String>();
// node names
// base znode for this cluster
@ -372,14 +368,6 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
this.zooKeeper.sync(path, null, null);
}
/**
* Get the set of already watched unassigned nodes.
* @return Set of Nodes.
*/
public Set<String> getNodes() {
return unassignedNodes;
}
/**
* Handles KeeperExceptions in client calls.
* <p>

View File

@ -97,13 +97,6 @@ public class MiniHBaseCluster {
this.user = User.getCurrent();
}
@Override
public boolean closeRegion(HRegionInfo region)
throws IOException {
if (TEST_SKIP_CLOSE) return true;
return super.closeRegion(region);
}
/*
* @param c
* @param currentfs We return this if we did not make a new one.

View File

@ -88,69 +88,6 @@ public class TestSplitTransactionOnCluster {
return regions.get(0).getRegionInfo();
}
/**
* Test what happens if master goes to balance a region just as regionserver
* goes to split it. The PENDING_CLOSE state is the strange one since its
* in the Master's head only, not out in zk. Test this case.
* @throws IOException
* @throws InterruptedException
* @throws NodeExistsException
* @throws KeeperException
*/
@Test (timeout = 600000) public void testPendingCloseAndSplit()
throws IOException, InterruptedException, NodeExistsException, KeeperException {
final byte [] tableName = Bytes.toBytes("pendingCloseAndSplit");
// Create table then get the single region for our new table.
HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
List<HRegion> regions = cluster.getRegions(tableName);
HRegionInfo hri = getAndCheckSingleTableRegion(regions);
int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
// Turn off balancer so it doesn't cut in and mess up our placements.
this.admin.balanceSwitch(false);
// Turn off the meta scanner so it don't remove parent on us.
this.cluster.getMaster().setCatalogJanitorEnabled(false);
try {
// Add a bit of load up into the table so splittable.
TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
// Get region pre-split.
HRegionServer server = cluster.getRegionServer(tableRegionIndex);
printOutRegions(server, "Initial regions: ");
int regionCount = server.getOnlineRegions().size();
// Now send in a close of a region but first make the close on the regionserver
// a NOOP. This way the master has all the state of it going to close
// but then a SPLITTING arrives. This is what we want to test.
// Here is how we turn CLOSE into NOOP in test.
MiniHBaseCluster.MiniHBaseClusterRegionServer.TEST_SKIP_CLOSE = true;
this.cluster.getMaster().unassign(hri.getRegionName(), false);
// Now try splitting and it should work.
LOG.info("Running split on server " + server.toString());
split(hri, server, regionCount);
// Get daughters
List<HRegion> daughters = this.cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
// Assert the ephemeral node is gone in zk.
String path = ZKAssign.getNodeName(t.getConnection().getZooKeeperWatcher(),
hri.getEncodedName());
Stat stat = null;
for (int i = 0; i < 10; i++) {
stat = t.getConnection().getZooKeeperWatcher().getZooKeeper().exists(path, false);
LOG.info("Stat for znode path=" + path + ": " + stat);
if (stat == null) break;
org.apache.hadoop.hbase.util.Threads.sleep(100);
}
assertTrue(stat == null);
} finally {
// Set this flag back.
MiniHBaseCluster.MiniHBaseClusterRegionServer.TEST_SKIP_CLOSE = false;
admin.balanceSwitch(true);
cluster.getMaster().setCatalogJanitorEnabled(true);
}
}
/**
* A test that intentionally has master fail the processing of the split message.
* Tests that the regionserver split ephemeral node gets cleaned up if it

View File

@ -20,6 +20,7 @@
package org.apache.hadoop.hbase.util;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.ArrayList;
@ -37,7 +38,9 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
@ -168,7 +171,14 @@ public class TestHBaseFsck {
Thread.sleep(1 * 1000);
ArrayList servers = new ArrayList();
servers.add(rsAddressOrig);
HBaseFsckRepair.fixDupeAssignment(conf, hriOrig, servers);
try {
HBaseFsckRepair.fixDupeAssignment(TEST_UTIL.getHBaseAdmin(), hriOrig, servers);
} catch (IOException ex) {
ex = RemoteExceptionHandler.checkIOException(ex);
if (!(ex instanceof UnknownRegionException)) {
fail("Unexpected exception: " + ex);
}
}
// We created 1 table, should be fine
assertNoErrors(doFsck(false));