();
- splitRegionHandlerCalled = new AtomicBoolean(false);
}
void updateClosedRegionHandlerTracker(HRegionInfo hri) {
@@ -1082,36 +996,6 @@ public class AssignmentManager extends ZooKeeperListener {
}
}
- void updateSplitHandlerTracker() {
- if (splitRegionHandlerCalled != null) { //only for unit tests this is true
- splitRegionHandlerCalled.set(true);
- }
- }
-
- /**
- * @return Returns true if this RegionState is splittable; i.e. the
- * RegionState is currently in splitting state or pending_close or
- * null (Anything else will return false). (Anything else will return false).
- */
- private boolean isInStateForSplitting(final RegionState rs) {
- if (rs == null) return true;
- if (rs.isSplitting()) return true;
- if (convertPendingCloseToSplitting(rs)) return true;
- LOG.warn("Dropped region split! Not in state good for SPLITTING; rs=" + rs);
- return false;
- }
-
- /**
- * @return Returns true if both regions are merging/open on specified server
- */
- private boolean isInStateForMerging(final ServerName sn,
- final HRegionInfo a, final HRegionInfo b) {
- RegionState rs_a = regionStates.getRegionState(a);
- RegionState rs_b = regionStates.getRegionState(b);
- return ((rs_a == null || rs_a.isOpenOrMergingOnServer(sn))
- && (rs_b == null || rs_b.isOpenOrMergingOnServer(sn)));
- }
-
// TODO: processFavoredNodes might throw an exception, for e.g., if the
// meta could not be contacted/updated. We need to see how seriously to treat
// this problem as. Should we fail the current assignment. We should be able
@@ -1130,25 +1014,6 @@ public class AssignmentManager extends ZooKeeperListener {
FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, catalogTracker);
}
- /**
- * If the passed regionState is in PENDING_CLOSE, clean up PENDING_CLOSE
- * state and convert it to SPLITTING instead.
- * This can happen in case where master wants to close a region at same time
- * a regionserver starts a split. The split won. Clean out old PENDING_CLOSE
- * state.
- * @param rs
- * @return True if we converted from PENDING_CLOSE to SPLITTING
- */
- private boolean convertPendingCloseToSplitting(final RegionState rs) {
- if (!rs.isPendingClose()) return false;
- LOG.debug("Converting PENDING_CLOSE to SPLITTING; rs=" + rs);
- regionStates.updateRegionState(rs.getRegion(), State.SPLITTING);
- // Clean up existing state. Clear from region plans seems all we
- // have to do here by way of clean up of PENDING_CLOSE.
- clearRegionPlan(rs.getRegion());
- return true;
- }
-
/**
* Handle a ZK unassigned node transition triggered by HBCK repair tool.
*
@@ -1305,38 +1170,56 @@ public class AssignmentManager extends ZooKeeperListener {
Lock lock = locker.acquireLock(regionName);
try {
RegionState rs = regionStates.getRegionTransitionState(regionName);
- if (rs == null) return;
+ if (rs == null) {
+ rs = regionStates.getRegionState(regionName);
+ if (rs == null || !rs.isMergingNew()) {
+ // MergingNew is an offline state
+ return;
+ }
+ }
HRegionInfo regionInfo = rs.getRegion();
String regionNameStr = regionInfo.getRegionNameAsString();
LOG.debug("Znode " + regionNameStr + " deleted, state: " + rs);
- if (rs.isOpened()) {
- ServerName serverName = rs.getServerName();
- regionOnline(regionInfo, serverName);
- boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
- if (!serverManager.isServerOnline(serverName) && !disabled) {
- LOG.info("Opened " + regionNameStr
- + "but the region server is offline, reassign the region");
- assign(regionInfo, true);
- } else if (disabled) {
- // if server is offline, no hurt to unassign again
- LOG.info("Opened " + regionNameStr
- + "but this table is disabled, triggering close of region");
- unassign(regionInfo);
+ boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
+ ServerName serverName = rs.getServerName();
+ if (serverManager.isServerOnline(serverName)) {
+ if (rs.isOnServer(serverName)
+ && (rs.isOpened() || rs.isSplitting())) {
+ regionOnline(regionInfo, serverName);
+ if (disabled) {
+ // if server is offline, no hurt to unassign again
+ LOG.info("Opened " + regionNameStr
+ + "but this table is disabled, triggering close of region");
+ unassign(regionInfo);
+ }
+ } else if (rs.isMergingNew()) {
+ synchronized (regionStates) {
+ String p = regionInfo.getEncodedName();
+ PairOfSameType regions = mergingRegions.get(p);
+ if (regions != null) {
+ onlineMergingRegion(disabled, regions.getFirst(), serverName);
+ onlineMergingRegion(disabled, regions.getSecond(), serverName);
+ }
+ }
}
- } else if (rs.isSplitting()) {
- LOG.debug("Ephemeral node deleted. Found in SPLITTING state. " + "Removing from RIT "
- + rs.getRegion());
- // it can be either SPLIT fail, or RS dead.
- regionStates.regionOnline(rs.getRegion(), rs.getServerName());
}
- // RS does not delete the znode in case SPLIT, it only means RS died which
- // will be handled by SSH
- // in region merge we do not put merging regions to MERGING state
} finally {
lock.unlock();
}
}
+
+ private void onlineMergingRegion(boolean disabled,
+ final HRegionInfo hri, final ServerName serverName) {
+ RegionState regionState = regionStates.getRegionState(hri);
+ if (regionState != null && regionState.isMerging()
+ && regionState.isOnServer(serverName)) {
+ regionOnline(regionState.getRegion(), serverName);
+ if (disabled) {
+ unassign(hri);
+ }
+ }
+ }
});
}
}
@@ -1371,23 +1254,7 @@ public class AssignmentManager extends ZooKeeperListener {
// on it, so no need to watch it again. So, as I know for now,
// this is needed to watch splitting nodes only.
if (!regionStates.isRegionInTransition(child)) {
- stat.setVersion(0);
- byte[] data = ZKAssign.getDataAndWatch(watcher,
- ZKUtil.joinZNode(watcher.assignmentZNode, child), stat);
- if (data != null && stat.getVersion() > 0) {
- try {
- RegionTransition rt = RegionTransition.parseFrom(data);
-
- //See HBASE-7551, handle splitting too, in case we miss the node change event
- EventType type = rt.getEventType();
- if (type == EventType.RS_ZK_REGION_SPLITTING
- || type == EventType.RS_ZK_REGION_MERGING) {
- handleRegion(rt, stat.getVersion());
- }
- } catch (DeserializationException de) {
- LOG.error("error getting data for " + child, de);
- }
- }
+ ZKAssign.getDataAndWatch(watcher, child, stat);
}
}
}
@@ -2374,7 +2241,8 @@ public class AssignmentManager extends ZooKeeperListener {
*/
public void unassign(HRegionInfo region, boolean force, ServerName dest) {
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
- LOG.debug("Starting unassign of " + region.getRegionNameAsString() + " (offlining)");
+ LOG.debug("Starting unassign of " + region.getRegionNameAsString()
+ + " (offlining), current state: " + regionStates.getRegionState(region));
String encodedName = region.getEncodedName();
// Grab the state of this region and synchronize on it
@@ -2389,8 +2257,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Region is not in transition.
// We can unassign it only if it's not SPLIT/MERGED.
state = regionStates.getRegionState(encodedName);
- if (state != null && (state.isMerged()
- || state.isSplit() || state.isOffline())) {
+ if (state != null && state.isNotUnassignableNotInTransition()) {
LOG.info("Attempting to unassign " + state + ", ignored");
// Offline region will be reassigned below
return;
@@ -2484,27 +2351,9 @@ public class AssignmentManager extends ZooKeeperListener {
* @param region regioninfo of znode to be deleted.
*/
public void deleteClosingOrClosedNode(HRegionInfo region) {
- String encodedName = region.getEncodedName();
- try {
- if (!ZKAssign.deleteNode(watcher, encodedName,
- EventType.M_ZK_REGION_CLOSING)) {
- boolean deleteNode = ZKAssign.deleteNode(watcher,
- encodedName, EventType.RS_ZK_REGION_CLOSED);
- // TODO : We don't abort if the delete node returns false. Is there any
- // such corner case?
- if (!deleteNode) {
- LOG.error("The deletion of the CLOSED node for "
- + encodedName + " returned " + deleteNode);
- }
- }
- } catch (NoNodeException e) {
- LOG.debug("CLOSING/CLOSED node for " + encodedName
- + " already deleted");
- } catch (KeeperException ke) {
- server.abort(
- "Unexpected ZK exception deleting node CLOSING/CLOSED for the region "
- + encodedName, ke);
- }
+ String regionName = region.getEncodedName();
+ deleteNodeInStates(regionName, "closing", EventType.M_ZK_REGION_CLOSING,
+ EventType.RS_ZK_REGION_CLOSED);
}
/**
@@ -2519,16 +2368,22 @@ public class AssignmentManager extends ZooKeeperListener {
// This may fail if the SPLIT or SPLITTING or MERGED or MERGING znode gets
// cleaned up before we can get data from it.
byte [] data = ZKAssign.getData(watcher, path);
- if (data == null) return false;
+ if (data == null) {
+ LOG.info("Node " + path + " is gone");
+ return false;
+ }
RegionTransition rt = RegionTransition.parseFrom(data);
switch (rt.getEventType()) {
+ case RS_ZK_REQUEST_REGION_SPLIT:
case RS_ZK_REGION_SPLIT:
case RS_ZK_REGION_SPLITTING:
+ case RS_ZK_REQUEST_REGION_MERGE:
case RS_ZK_REGION_MERGED:
case RS_ZK_REGION_MERGING:
result = true;
break;
default:
+ LOG.info("Node " + path + " is in " + rt.getEventType());
break;
}
return result;
@@ -2818,6 +2673,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Region is being served and on an active server
// add only if region not in disabled or enabling table
if (!disabledOrEnablingTables.contains(tableName)) {
+ regionStates.updateRegionState(regionInfo, State.OPEN, regionLocation);
regionStates.regionOnline(regionInfo, regionLocation);
}
// need to enable the table if not disabled or disabling or enabling
@@ -3275,8 +3131,9 @@ public class AssignmentManager extends ZooKeeperListener {
server.abort("Unexpected ZK exception deleting node " + hri, ke);
}
if (zkTable.isDisablingOrDisabledTable(hri.getTable())) {
- it.remove();
+ regionStates.updateRegionState(hri, State.OFFLINE);
regionStates.regionOffline(hri);
+ it.remove();
continue;
}
// Mark the region offline and assign it again by SSH
@@ -3289,38 +3146,6 @@ public class AssignmentManager extends ZooKeeperListener {
return regions;
}
- /**
- * Update inmemory structures.
- * @param sn Server that reported the split
- * @param parent Parent region that was split
- * @param a Daughter region A
- * @param b Daughter region B
- */
- public void handleSplitReport(final ServerName sn, final HRegionInfo parent,
- final HRegionInfo a, final HRegionInfo b) {
- synchronized (regionStates) {
- regionOffline(parent, State.SPLIT);
- onlineNewRegion(a, sn);
- onlineNewRegion(b, sn);
- }
- }
-
- /**
- * Update inmemory structures.
- * @param sn Server that reported the merge
- * @param merged regioninfo of merged
- * @param a region a
- * @param b region b
- */
- public void handleRegionsMergeReport(final ServerName sn,
- final HRegionInfo merged, final HRegionInfo a, final HRegionInfo b) {
- synchronized (regionStates) {
- regionOffline(a, State.MERGED);
- regionOffline(b, State.MERGED);
- onlineNewRegion(merged, sn);
- }
- }
-
/**
* @param plan Plan to execute.
*/
@@ -3397,33 +3222,283 @@ public class AssignmentManager extends ZooKeeperListener {
return true;
}
+ private boolean deleteNodeInStates(
+ String regionName, String desc, EventType... types) {
+ try {
+ for (EventType et: types) {
+ if (ZKAssign.deleteNode(watcher, regionName, et)) {
+ return true;
+ }
+ }
+ LOG.info("Failed to delete the " + desc + " node for "
+ + regionName + ". The node type may not match");
+ } catch (NoNodeException e) {
+ LOG.debug("The " + desc + " node for " + regionName + " already deleted");
+ } catch (KeeperException ke) {
+ server.abort("Unexpected ZK exception deleting " + desc
+ + " node for the region " + regionName, ke);
+ }
+ return false;
+ }
+
+ private void deleteMergingNode(String encodedName) {
+ deleteNodeInStates(encodedName, "merging", EventType.RS_ZK_REGION_MERGING,
+ EventType.RS_ZK_REQUEST_REGION_MERGE, EventType.RS_ZK_REGION_MERGED);
+ }
+
+ private void deleteSplittingNode(String encodedName) {
+ deleteNodeInStates(encodedName, "splitting", EventType.RS_ZK_REGION_SPLITTING,
+ EventType.RS_ZK_REQUEST_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT);
+ }
+
/**
* A helper to handle region merging transition event.
* It transitions merging regions to MERGING state.
*/
- private boolean handleRegionMerging(final RegionTransition rt,
+ private boolean handleRegionMerging(final RegionTransition rt, final String encodedName,
final String prettyPrintedRegionName, final ServerName sn) {
+ if (!serverManager.isServerOnline(sn)) {
+ LOG.warn("Dropped merging! ServerName=" + sn + " unknown.");
+ return false;
+ }
byte [] payloadOfMerging = rt.getPayload();
List mergingRegions;
try {
mergingRegions = HRegionInfo.parseDelimitedFrom(
payloadOfMerging, 0, payloadOfMerging.length);
} catch (IOException e) {
- LOG.error("Dropped merging! Failed reading merging payload for "
- + prettyPrintedRegionName);
+ LOG.error("Dropped merging! Failed reading " + rt.getEventType()
+ + " payload for " + prettyPrintedRegionName);
return false;
}
- assert mergingRegions.size() == 2;
- HRegionInfo merging_a = mergingRegions.get(0);
- HRegionInfo merging_b = mergingRegions.get(1);
+ assert mergingRegions.size() == 3;
+ HRegionInfo p = mergingRegions.get(0);
+ HRegionInfo hri_a = mergingRegions.get(1);
+ HRegionInfo hri_b = mergingRegions.get(2);
- if (!isInStateForMerging(sn, merging_a, merging_b)) {
- LOG.warn("Dropped merging! Not in state good for MERGING; rs_a="
- + merging_a + ", rs_b=" + merging_b);
+ RegionState rs_p = regionStates.getRegionState(p);
+ RegionState rs_a = regionStates.getRegionState(hri_a);
+ RegionState rs_b = regionStates.getRegionState(hri_b);
+
+ if (!((rs_a == null || rs_a.isOpenOrMergingOnServer(sn))
+ && (rs_b == null || rs_b.isOpenOrMergingOnServer(sn))
+ && (rs_p == null || rs_p.isOpenOrMergingNewOnServer(sn)))) {
+ LOG.warn("Dropped merging! Not in state good for MERGING; rs_p="
+ + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
return false;
}
- regionStates.updateRegionState(merging_a, State.MERGING);
- regionStates.updateRegionState(merging_b, State.MERGING);
+
+ EventType et = rt.getEventType();
+ if (et == EventType.RS_ZK_REQUEST_REGION_MERGE) {
+ try {
+ if (RegionMergeTransaction.transitionMergingNode(watcher, p,
+ hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_MERGE,
+ EventType.RS_ZK_REGION_MERGING) == -1) {
+ byte[] data = ZKAssign.getData(watcher, encodedName);
+ EventType currentType = null;
+ if (data != null) {
+ RegionTransition newRt = RegionTransition.parseFrom(data);
+ currentType = newRt.getEventType();
+ }
+ if (currentType == null || (currentType != EventType.RS_ZK_REGION_MERGED
+ && currentType != EventType.RS_ZK_REGION_MERGING)) {
+ LOG.warn("Failed to transition pending_merge node "
+ + encodedName + " to merging, it's now " + currentType);
+ return false;
+ }
+ }
+ } catch (Exception e) {
+ LOG.warn("Failed to transition pending_merge node "
+ + encodedName + " to merging", e);
+ return false;
+ }
+ }
+
+ synchronized (regionStates) {
+ if (regionStates.getRegionState(p) == null) {
+ regionStates.createRegionState(p);
+ }
+ regionStates.updateRegionState(hri_a, State.MERGING);
+ regionStates.updateRegionState(hri_b, State.MERGING);
+
+ if (et != EventType.RS_ZK_REGION_MERGED) {
+ regionStates.updateRegionState(p, State.MERGING_NEW, sn);;
+ regionStates.regionOffline(p, State.MERGING_NEW);
+ this.mergingRegions.put(encodedName,
+ new PairOfSameType(hri_a, hri_b));
+ } else {
+ this.mergingRegions.remove(encodedName);
+ regionStates.updateRegionState(hri_a, State.MERGED);
+ regionStates.updateRegionState(hri_b, State.MERGED);
+ regionOffline(hri_a, State.MERGED);
+ regionOffline(hri_b, State.MERGED);
+ regionOnline(p, sn);
+ }
+ }
+
+ if (et == EventType.RS_ZK_REGION_MERGED) {
+ LOG.debug("Handling MERGED event for " + encodedName + "; deleting node");
+ // Remove region from ZK
+ try {
+ boolean successful = false;
+ while (!successful) {
+ // It's possible that the RS tickles in between the reading of the
+ // znode and the deleting, so it's safe to retry.
+ successful = ZKAssign.deleteNode(
+ watcher, encodedName, EventType.RS_ZK_REGION_MERGED);
+ }
+ } catch (KeeperException e) {
+ if (e instanceof NoNodeException) {
+ String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
+ LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
+ } else {
+ server.abort("Error deleting MERGED node " + encodedName, e);
+ }
+ }
+ LOG.info("Handled MERGED event; merged=" + p.getRegionNameAsString()
+ + ", region_a=" + hri_a.getRegionNameAsString() + ", region_b="
+ + hri_b.getRegionNameAsString() + ", on " + sn);
+
+ // User could disable the table before master knows the new region.
+ if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
+ unassign(p);
+ }
+ }
+ return true;
+ }
+
+ /**
+ * A helper to handle region splitting transition event.
+ */
+ private boolean handleRegionSplitting(final RegionTransition rt, final String encodedName,
+ final String prettyPrintedRegionName, final ServerName sn) {
+ if (!serverManager.isServerOnline(sn)) {
+ LOG.warn("Dropped splitting! ServerName=" + sn + " unknown.");
+ return false;
+ }
+ byte [] payloadOfSplitting = rt.getPayload();
+ List splittingRegions;
+ try {
+ splittingRegions = HRegionInfo.parseDelimitedFrom(
+ payloadOfSplitting, 0, payloadOfSplitting.length);
+ } catch (IOException e) {
+ LOG.error("Dropped splitting! Failed reading " + rt.getEventType()
+ + " payload for " + prettyPrintedRegionName);
+ return false;
+ }
+ assert splittingRegions.size() == 2;
+ HRegionInfo hri_a = splittingRegions.get(0);
+ HRegionInfo hri_b = splittingRegions.get(1);
+
+ RegionState rs_p = regionStates.getRegionState(encodedName);
+ RegionState rs_a = regionStates.getRegionState(hri_a);
+ RegionState rs_b = regionStates.getRegionState(hri_b);
+
+ if (!((rs_p == null || rs_p.isOpenOrSplittingOnServer(sn))
+ && (rs_a == null || rs_a.isOpenOrSplittingNewOnServer(sn))
+ && (rs_b == null || rs_b.isOpenOrSplittingNewOnServer(sn)))) {
+ LOG.warn("Dropped splitting! Not in state good for SPLITTING; rs_p="
+ + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
+ return false;
+ }
+
+ if (rs_p == null) {
+ // Splitting region should be online
+ rs_p = regionStates.updateRegionState(rt, State.OPEN);
+ if (rs_p == null) {
+ LOG.warn("Received splitting for region " + prettyPrintedRegionName
+ + " from server " + sn + " but it doesn't exist anymore,"
+ + " probably already processed its split");
+ return false;
+ }
+ regionStates.regionOnline(rs_p.getRegion(), sn);
+ }
+
+ HRegionInfo p = rs_p.getRegion();
+ EventType et = rt.getEventType();
+ if (et == EventType.RS_ZK_REQUEST_REGION_SPLIT) {
+ try {
+ if (SplitTransaction.transitionSplittingNode(watcher, p,
+ hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_SPLIT,
+ EventType.RS_ZK_REGION_SPLITTING) == -1) {
+ byte[] data = ZKAssign.getData(watcher, encodedName);
+ EventType currentType = null;
+ if (data != null) {
+ RegionTransition newRt = RegionTransition.parseFrom(data);
+ currentType = newRt.getEventType();
+ }
+ if (currentType == null || (currentType != EventType.RS_ZK_REGION_SPLIT
+ && currentType != EventType.RS_ZK_REGION_SPLITTING)) {
+ LOG.warn("Failed to transition pending_split node "
+ + encodedName + " to splitting, it's now " + currentType);
+ return false;
+ }
+ }
+ } catch (Exception e) {
+ LOG.warn("Failed to transition pending_split node "
+ + encodedName + " to splitting", e);
+ return false;
+ }
+ }
+
+ synchronized (regionStates) {
+ if (regionStates.getRegionState(hri_a) == null) {
+ regionStates.createRegionState(hri_a);
+ }
+ if (regionStates.getRegionState(hri_b) == null) {
+ regionStates.createRegionState(hri_b);
+ }
+
+ regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
+ regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
+ regionStates.regionOffline(hri_a, State.SPLITTING_NEW);
+ regionStates.regionOffline(hri_b, State.SPLITTING_NEW);
+ regionStates.updateRegionState(rt, State.SPLITTING);
+
+ // The below is for testing ONLY! We can't do fault injection easily, so
+ // resort to this kinda uglyness -- St.Ack 02/25/2011.
+ if (TEST_SKIP_SPLIT_HANDLING) {
+ LOG.warn("Skipping split message, TEST_SKIP_SPLIT_HANDLING is set");
+ return true; // return true so that the splitting node stays
+ }
+
+ if (et == EventType.RS_ZK_REGION_SPLIT) {
+ regionStates.updateRegionState(p, State.SPLIT);
+ regionOffline(p, State.SPLIT);
+ regionOnline(hri_a, sn);
+ regionOnline(hri_b, sn);
+ }
+ }
+
+ if (et == EventType.RS_ZK_REGION_SPLIT) {
+ LOG.debug("Handling SPLIT event for " + encodedName + "; deleting node");
+ // Remove region from ZK
+ try {
+ boolean successful = false;
+ while (!successful) {
+ // It's possible that the RS tickles in between the reading of the
+ // znode and the deleting, so it's safe to retry.
+ successful = ZKAssign.deleteNode(
+ watcher, encodedName, EventType.RS_ZK_REGION_SPLIT);
+ }
+ } catch (KeeperException e) {
+ if (e instanceof NoNodeException) {
+ String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
+ LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
+ } else {
+ server.abort("Error deleting SPLIT node " + encodedName, e);
+ }
+ }
+ LOG.info("Handled SPLIT event; parent=" + p.getRegionNameAsString()
+ + ", daughter a=" + hri_a.getRegionNameAsString() + ", daughter b="
+ + hri_b.getRegionNameAsString() + ", on " + sn);
+
+ // User could disable the table before master knows the new region.
+ if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
+ unassign(hri_a);
+ unassign(hri_b);
+ }
+ }
return true;
}
@@ -3438,21 +3513,4 @@ public class AssignmentManager extends ZooKeeperListener {
// remove the region plan as well just in case.
clearRegionPlan(regionInfo);
}
-
- /**
- * Online a newly created region, which is usually from split/merge.
- */
- private void onlineNewRegion(final HRegionInfo region, final ServerName sn) {
- synchronized (regionStates) {
- // Someone could find the region from meta and reassign it.
- if (regionStates.getRegionState(region) == null) {
- regionStates.createRegionState(region);
- regionOnline(region, sn);
- }
- }
- // User could disable the table before master knows the new region.
- if (zkTable.isDisablingOrDisabledTable(region.getTable())) {
- unassign(region);
- }
- }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index c33c46e6e02..2e5c2964acf 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -190,7 +190,15 @@ public class RegionStates {
*/
public synchronized boolean isRegionInState(
final HRegionInfo hri, final State... states) {
- RegionState regionState = getRegionState(hri);
+ return isRegionInState(hri.getEncodedName(), states);
+ }
+
+ /**
+ * @return True if specified region is in one of the specified states.
+ */
+ public synchronized boolean isRegionInState(
+ final String regionName, final State... states) {
+ RegionState regionState = getRegionState(regionName);
State s = regionState != null ? regionState.getState() : null;
for (State state: states) {
if (s == state) return true;
@@ -358,11 +366,11 @@ public class RegionStates {
if (oldState == null) {
LOG.warn("Online region not in RegionStates: " + hri.getShortNameToLog());
} else {
- State state = oldState.getState();
ServerName sn = oldState.getServerName();
- if (state != State.OPEN || sn == null || !sn.equals(serverName)) {
- LOG.debug("Online " + hri.getShortNameToLog() + " with current state=" + state +
- ", expected state=OPEN" + ", assigned to server: " + sn + " expected " + serverName);
+ if (!oldState.isReadyToOnline() || sn == null || !sn.equals(serverName)) {
+ LOG.debug("Online " + hri.getShortNameToLog() + " with current state="
+ + oldState.getState() + ", expected state=OPEN/MERGING_NEW/SPLITTING_NEW"
+ + ", assigned to server: " + sn + " expected " + serverName);
}
}
updateRegionState(hri, State.OPEN, serverName);
@@ -434,29 +442,28 @@ public class RegionStates {
}
/**
- * A region is offline, won't be in transition any more.
- * Its state should be the specified expected state, which
- * can be Split/Merged/Offline/null(=Offline) only.
+ * A region is offline, won't be in transition any more. Its state
+ * should be the specified expected state, which can only be
+ * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
*/
public synchronized void regionOffline(
final HRegionInfo hri, final State expectedState) {
Preconditions.checkArgument(expectedState == null
- || expectedState == State.OFFLINE || expectedState == State.SPLIT
- || expectedState == State.MERGED, "Offlined region should be in state"
- + " OFFLINE/SPLIT/MERGED instead of " + expectedState);
+ || RegionState.isNotUnassignableNotInTransition(expectedState),
+ "Offlined region should be in state OFFLINE/SPLIT/MERGED/"
+ + "SPLITTING_NEW/MERGING_NEW instead of " + expectedState);
String regionName = hri.getEncodedName();
RegionState oldState = regionStates.get(regionName);
if (oldState == null) {
LOG.warn("Offline region not in RegionStates: " + hri.getShortNameToLog());
} else if (LOG.isDebugEnabled()) {
- State state = oldState.getState();
ServerName sn = oldState.getServerName();
- if (state != State.OFFLINE
- && state != State.SPLITTING && state != State.MERGING) {
+ if (!oldState.isReadyToOffline()) {
LOG.debug("Offline " + hri.getShortNameToLog() + " with current state="
- + state + ", expected state=OFFLINE/SPLITTING/MERGING");
+ + oldState.getState() + ", expected state=OFFLINE/SPLIT/"
+ + "MERGED/SPLITTING_NEW/MERGING_NEW");
}
- if (sn != null && state == State.OFFLINE) {
+ if (sn != null && oldState.isOffline()) {
LOG.debug("Offline " + hri.getShortNameToLog()
+ " with current state=OFFLINE, assigned to server: "
+ sn + ", expected null");
@@ -497,9 +504,8 @@ public class RegionStates {
if (isRegionOnline(region)) {
regionsToOffline.add(region);
} else {
- RegionState state = getRegionState(region);
- if (state.isSplitting() || state.isMerging()) {
- LOG.debug("Offline splitting/merging region " + state);
+ if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
+ LOG.debug("Offline splitting/merging region " + getRegionState(region));
try {
// Delete the ZNode if exists
ZKAssign.deleteNodeFailSilent(watcher, region);
@@ -512,6 +518,7 @@ public class RegionStates {
}
for (HRegionInfo hri : regionsToOffline) {
+ updateRegionState(hri, State.OFFLINE);
regionOffline(hri);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MergedRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MergedRegionHandler.java
deleted file mode 100644
index dc1aeb30e91..00000000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MergedRegionHandler.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.executor.EventHandler;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.zookeeper.ZKAssign;
-import org.apache.hadoop.hbase.zookeeper.ZKUtil;
-import org.apache.zookeeper.KeeperException;
-import org.apache.zookeeper.KeeperException.NoNodeException;
-
-/**
- * Handles MERGED regions event on Master, master receive the merge report from
- * the regionserver, then offline the merging regions and online the merged
- * region.Here region_a sorts before region_b.
- */
-@InterfaceAudience.Private
-public class MergedRegionHandler extends EventHandler implements
- TotesHRegionInfo {
- private static final Log LOG = LogFactory.getLog(MergedRegionHandler.class);
- private final AssignmentManager assignmentManager;
- private final HRegionInfo merged;
- private final HRegionInfo region_a;
- private final HRegionInfo region_b;
- private final ServerName sn;
-
- public MergedRegionHandler(Server server,
- AssignmentManager assignmentManager, ServerName sn,
- final List mergeRegions) {
- super(server, EventType.RS_ZK_REGION_MERGED);
- assert mergeRegions.size() == 3;
- this.assignmentManager = assignmentManager;
- this.merged = mergeRegions.get(0);
- this.region_a = mergeRegions.get(1);
- this.region_b = mergeRegions.get(2);
- this.sn = sn;
- }
-
- @Override
- public HRegionInfo getHRegionInfo() {
- return this.merged;
- }
-
- @Override
- public String toString() {
- String name = "UnknownServerName";
- if (server != null && server.getServerName() != null) {
- name = server.getServerName().toString();
- }
- String mergedRegion = "UnknownRegion";
- if (merged != null) {
- mergedRegion = merged.getRegionNameAsString();
- }
- return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-"
- + mergedRegion;
- }
-
- @Override
- public void process() {
- String encodedRegionName = this.merged.getEncodedName();
- LOG.debug("Handling MERGE event for " + encodedRegionName
- + "; deleting node");
-
- this.assignmentManager.handleRegionsMergeReport(this.sn, this.merged,
- this.region_a, this.region_b);
- // Remove region from ZK
- try {
-
- boolean successful = false;
- while (!successful) {
- // It's possible that the RS tickles in between the reading of the
- // znode and the deleting, so it's safe to retry.
- successful = ZKAssign.deleteNode(this.server.getZooKeeper(),
- encodedRegionName, EventType.RS_ZK_REGION_MERGED);
- }
- } catch (KeeperException e) {
- if (e instanceof NoNodeException) {
- String znodePath = ZKUtil.joinZNode(
- this.server.getZooKeeper().splitLogZNode, encodedRegionName);
- LOG.debug("The znode " + znodePath
- + " does not exist. May be deleted already.");
- } else {
- server.abort("Error deleting MERGED node in ZK for transition ZK node ("
- + merged.getEncodedName() + ")", e);
- }
- }
- LOG.info("Handled MERGED event; merged="
- + this.merged.getRegionNameAsString() + " region_a="
- + this.region_a.getRegionNameAsString() + "region_b="
- + this.region_b.getRegionNameAsString());
- }
-}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
index f3f48106c05..59ea53d1acb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
@@ -246,10 +247,14 @@ public class ServerShutdownHandler extends EventHandler {
//clean zk node
LOG.info("Reassigning region with rs = " + rit + " and deleting zk node if exists");
ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), hri);
+ regionStates.updateRegionState(hri, State.OFFLINE);
} catch (KeeperException ke) {
this.server.abort("Unexpected ZK exception deleting unassigned node " + hri, ke);
return;
}
+ } else if (regionStates.isRegionInState(
+ hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
+ regionStates.regionOffline(hri);
}
toAssignRegions.add(hri);
} else if (rit != null) {
@@ -260,8 +265,9 @@ public class ServerShutdownHandler extends EventHandler {
// The rit that we use may be stale in case the table was in DISABLING state
// but though we did assign we will not be clearing the znode in CLOSING state.
// Doing this will have no harm. See HBASE-5927
+ regionStates.updateRegionState(hri, State.OFFLINE);
am.deleteClosingOrClosedNode(hri);
- am.regionOffline(hri);
+ am.offlineDisabledRegion(hri);
} else {
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
+ rit + " not to be assigned by SSH of server " + serverName);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/SplitRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/SplitRegionHandler.java
deleted file mode 100644
index 8d04e10aa94..00000000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/SplitRegionHandler.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.executor.EventHandler;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.zookeeper.ZKAssign;
-import org.apache.hadoop.hbase.zookeeper.ZKUtil;
-import org.apache.zookeeper.KeeperException;
-import org.apache.zookeeper.KeeperException.NoNodeException;
-
-/**
- * Handles SPLIT region event on Master.
- */
-@InterfaceAudience.Private
-public class SplitRegionHandler extends EventHandler implements TotesHRegionInfo {
- private static final Log LOG = LogFactory.getLog(SplitRegionHandler.class);
- private final AssignmentManager assignmentManager;
- private final HRegionInfo parent;
- private final ServerName sn;
- private final List daughters;
- /**
- * For testing only! Set to true to skip handling of split.
- */
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MS_SHOULD_BE_FINAL")
- public static boolean TEST_SKIP = false;
-
- public SplitRegionHandler(Server server,
- AssignmentManager assignmentManager, HRegionInfo regionInfo,
- ServerName sn, final List daughters) {
- super(server, EventType.RS_ZK_REGION_SPLIT);
- this.assignmentManager = assignmentManager;
- this.parent = regionInfo;
- this.sn = sn;
- this.daughters = daughters;
- }
-
- @Override
- public HRegionInfo getHRegionInfo() {
- return this.parent;
- }
-
- @Override
- public String toString() {
- String name = "UnknownServerName";
- if(server != null && server.getServerName() != null) {
- name = server.getServerName().toString();
- }
- String parentRegion = "UnknownRegion";
- if(parent != null) {
- parentRegion = parent.getRegionNameAsString();
- }
- return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-" + parentRegion;
- }
-
- @Override
- public void process() {
- String encodedRegionName = this.parent.getEncodedName();
- LOG.debug("Handling SPLIT event for " + encodedRegionName +
- "; deleting node");
- // The below is for testing ONLY! We can't do fault injection easily, so
- // resort to this kinda uglyness -- St.Ack 02/25/2011.
- if (TEST_SKIP) {
- LOG.warn("Skipping split message, TEST_SKIP is set");
- return;
- }
- this.assignmentManager.handleSplitReport(this.sn, this.parent,
- this.daughters.get(0), this.daughters.get(1));
- // Remove region from ZK
- try {
-
- boolean successful = false;
- while (!successful) {
- // It's possible that the RS tickles in between the reading of the
- // znode and the deleting, so it's safe to retry.
- successful = ZKAssign.deleteNode(this.server.getZooKeeper(),
- encodedRegionName,
- EventType.RS_ZK_REGION_SPLIT);
- }
- } catch (KeeperException e) {
- if (e instanceof NoNodeException) {
- String znodePath = ZKUtil.joinZNode(
- this.server.getZooKeeper().splitLogZNode, encodedRegionName);
- LOG.debug("The znode " + znodePath
- + " does not exist. May be deleted already.");
- } else {
- server.abort("Error deleting SPLIT node in ZK for transition ZK node (" +
- parent.getEncodedName() + ")", e);
- }
- }
- LOG.info("Handled SPLIT event; parent=" +
- this.parent.getRegionNameAsString() +
- " daughter a=" + this.daughters.get(0).getRegionNameAsString() +
- "daughter b=" + this.daughters.get(1).getRegionNameAsString());
- }
-}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 05a7b01bfd7..526e472e176 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -66,7 +66,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CompoundConfiguration;
import org.apache.hadoop.hbase.DroppedSnapshotException;
@@ -114,12 +113,9 @@ import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
-import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WALEntry;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
-import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
-import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALKey;
import org.apache.hadoop.hbase.regionserver.MultiVersionConsistencyControl.WriteEntry;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
@@ -2781,6 +2777,11 @@ public class HRegion implements HeapSize { // , Writable{
FileSystem fs = this.fs.getFileSystem();
NavigableSet files = HLogUtil.getSplitEditFilesSorted(fs, regiondir);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Found " + (files == null ? 0 : files.size())
+ + " recovered edits file(s) under " + regiondir);
+ }
+
if (files == null || files.isEmpty()) return seqid;
for (Path edits: files) {
@@ -2794,10 +2795,12 @@ public class HRegion implements HeapSize { // , Writable{
String fileName = edits.getName();
maxSeqId = Math.abs(Long.parseLong(fileName));
if (maxSeqId <= minSeqIdForTheRegion) {
- String msg = "Maximum sequenceid for this log is " + maxSeqId
+ if (LOG.isDebugEnabled()) {
+ String msg = "Maximum sequenceid for this log is " + maxSeqId
+ " and minimum sequenceid for the region is " + minSeqIdForTheRegion
+ ", skipped the whole file, path=" + edits;
- LOG.debug(msg);
+ LOG.debug(msg);
+ }
continue;
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeTransaction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeTransaction.java
index 457f85070fb..a525cf14eca 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeTransaction.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeTransaction.java
@@ -18,6 +18,10 @@
*/
package org.apache.hadoop.hbase.regionserver;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGED;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGING;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_MERGE;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -45,6 +49,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
+import org.apache.zookeeper.data.Stat;
/**
* Executes region merge as a "transaction". It is similar with
@@ -261,25 +266,16 @@ public class RegionMergeTransaction {
createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
} catch (KeeperException e) {
- throw new IOException("Failed creating MERGING znode on "
+ throw new IOException("Failed creating PENDING_MERGE znode on "
+ this.mergedRegionInfo.getRegionNameAsString(), e);
}
}
this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
if (server != null && server.getZooKeeper() != null) {
- try {
- // Transition node from MERGING to MERGING after creating the merge
- // node. Master will get the callback for node change only if the
- // transition is successful.
- // Note that if the transition fails then the rollback will delete the
- // created znode as the journal entry SET_MERGING_IN_ZK is added.
- this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
- this.mergedRegionInfo, server.getServerName(), -1,
- region_a.getRegionInfo(), region_b.getRegionInfo());
- } catch (KeeperException e) {
- throw new IOException("Failed setting MERGING znode on "
- + this.mergedRegionInfo.getRegionNameAsString(), e);
- }
+ // After creating the merge node, wait for master to transition it
+ // from PENDING_MERGE to MERGING so that we can move on. We want master
+ // knows about it and won't transition any region which is merging.
+ znodeVersion = getZKNode(server, services);
}
this.region_a.getRegionFileSystem().createMergesDir();
@@ -303,9 +299,10 @@ public class RegionMergeTransaction {
try {
// Do one more check on the merging znode (before it is too late) in case
// any merging region is moved somehow. If so, the znode transition will fail.
- this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
- this.mergedRegionInfo, server.getServerName(), this.znodeVersion,
- region_a.getRegionInfo(), region_b.getRegionInfo());
+ this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
+ this.mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo(),
+ server.getServerName(), this.znodeVersion,
+ RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGING);
} catch (KeeperException e) {
throw new IOException("Failed setting MERGING znode on "
+ this.mergedRegionInfo.getRegionNameAsString(), e);
@@ -489,9 +486,10 @@ public class RegionMergeTransaction {
// Tell master about merge by updating zk. If we fail, abort.
try {
- this.znodeVersion = transitionNodeMerge(server.getZooKeeper(),
- this.mergedRegionInfo, region_a.getRegionInfo(),
- region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
+ this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
+ this.mergedRegionInfo, region_a.getRegionInfo(),
+ region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
+ RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGED);
long startTime = EnvironmentEdgeManager.currentTimeMillis();
int spins = 0;
@@ -506,9 +504,10 @@ public class RegionMergeTransaction {
}
Thread.sleep(100);
// When this returns -1 it means the znode doesn't exist
- this.znodeVersion = tickleNodeMerge(server.getZooKeeper(),
- this.mergedRegionInfo, region_a.getRegionInfo(),
- region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
+ this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
+ this.mergedRegionInfo, region_a.getRegionInfo(),
+ region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
+ RS_ZK_REGION_MERGED, RS_ZK_REGION_MERGED);
spins++;
} while (this.znodeVersion != -1 && !server.isStopped()
&& !services.isStopping());
@@ -520,12 +519,83 @@ public class RegionMergeTransaction {
+ mergedRegionInfo.getEncodedName(), e);
}
-
// Leaving here, the mergedir with its dross will be in place but since the
// merge was successful, just leave it; it'll be cleaned when region_a is
// cleaned up by CatalogJanitor on master
}
+ /**
+ * Wait for the merging node to be transitioned from pending_merge
+ * to merging by master. That's how we are sure master has processed
+ * the event and is good with us to move on. If we don't get any update,
+ * we periodically transition the node so that master gets the callback.
+ * If the node is removed or is not in pending_merge state any more,
+ * we abort the merge.
+ */
+ private int getZKNode(final Server server,
+ final RegionServerServices services) throws IOException {
+ // Wait for the master to process the pending_merge.
+ try {
+ int spins = 0;
+ Stat stat = new Stat();
+ ZooKeeperWatcher zkw = server.getZooKeeper();
+ ServerName expectedServer = server.getServerName();
+ String node = mergedRegionInfo.getEncodedName();
+ while (!(server.isStopped() || services.isStopping())) {
+ if (spins % 5 == 0) {
+ LOG.debug("Still waiting for master to process "
+ + "the pending_merge for " + node);
+ transitionMergingNode(zkw, mergedRegionInfo, region_a.getRegionInfo(),
+ region_b.getRegionInfo(), expectedServer, -1, RS_ZK_REQUEST_REGION_MERGE,
+ RS_ZK_REQUEST_REGION_MERGE);
+ }
+ Thread.sleep(100);
+ spins++;
+ byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
+ if (data == null) {
+ throw new IOException("Data is null, merging node "
+ + node + " no longer exists");
+ }
+ RegionTransition rt = RegionTransition.parseFrom(data);
+ EventType et = rt.getEventType();
+ if (et == RS_ZK_REGION_MERGING) {
+ ServerName serverName = rt.getServerName();
+ if (!serverName.equals(expectedServer)) {
+ throw new IOException("Merging node " + node + " is for "
+ + serverName + ", not us " + expectedServer);
+ }
+ byte [] payloadOfMerging = rt.getPayload();
+ List mergingRegions = HRegionInfo.parseDelimitedFrom(
+ payloadOfMerging, 0, payloadOfMerging.length);
+ assert mergingRegions.size() == 3;
+ HRegionInfo a = mergingRegions.get(1);
+ HRegionInfo b = mergingRegions.get(2);
+ HRegionInfo hri_a = region_a.getRegionInfo();
+ HRegionInfo hri_b = region_b.getRegionInfo();
+ if (!(hri_a.equals(a) && hri_b.equals(b))) {
+ throw new IOException("Merging node " + node + " is for " + a + ", "
+ + b + ", not expected regions: " + hri_a + ", " + hri_b);
+ }
+ // Master has processed it.
+ return stat.getVersion();
+ }
+ if (et != RS_ZK_REQUEST_REGION_MERGE) {
+ throw new IOException("Merging node " + node
+ + " moved out of merging to " + et);
+ }
+ }
+ // Server is stopping/stopped
+ throw new IOException("Server is "
+ + (services.isStopping() ? "stopping" : "stopped"));
+ } catch (Exception e) {
+ if (e instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
+ }
+ throw new IOException("Failed getting MERGING znode on "
+ + mergedRegionInfo.getRegionNameAsString(), e);
+ }
+ }
+
/**
* Create reference file(s) of merging regions under the region_a merges dir
* @param hstoreFilesOfRegionA
@@ -566,6 +636,7 @@ public class RegionMergeTransaction {
* of no return and so now need to abort the server to minimize
* damage.
*/
+ @SuppressWarnings("deprecation")
public boolean rollback(final Server server,
final RegionServerServices services) throws IOException {
assert this.mergedRegionInfo != null;
@@ -653,20 +724,22 @@ public class RegionMergeTransaction {
private static void cleanZK(final Server server, final HRegionInfo hri) {
try {
// Only delete if its in expected state; could have been hijacked.
- ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
- EventType.RS_ZK_REGION_MERGING);
+ if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
+ RS_ZK_REQUEST_REGION_MERGE)) {
+ ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
+ RS_ZK_REGION_MERGING);
+ }
} catch (KeeperException.NoNodeException e) {
LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
} catch (KeeperException e) {
server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
}
-
}
/**
- * Creates a new ephemeral node in the MERGING state for the merged region.
+ * Creates a new ephemeral node in the PENDING_MERGE state for the merged region.
* Create it ephemeral in case regionserver dies mid-merge.
- *
+ *
*
* Does not transition nodes from other states. If a node already exists for
* this region, a {@link NodeExistsException} will be thrown.
@@ -674,32 +747,27 @@ public class RegionMergeTransaction {
* @param zkw zk reference
* @param region region to be created as offline
* @param serverName server event originates from
- * @return Version of znode created.
* @throws KeeperException
* @throws IOException
*/
- int createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
+ public static void createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
final ServerName serverName, final HRegionInfo a,
final HRegionInfo b) throws KeeperException, IOException {
LOG.debug(zkw.prefix("Creating ephemeral node for "
- + region.getEncodedName() + " in MERGING state"));
- byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
+ + region.getEncodedName() + " in PENDING_MERGE state"));
+ byte [] payload = HRegionInfo.toDelimitedByteArray(region, a, b);
RegionTransition rt = RegionTransition.createRegionTransition(
- EventType.RS_ZK_REGION_MERGING, region.getRegionName(), serverName, payload);
+ RS_ZK_REQUEST_REGION_MERGE, region.getRegionName(), serverName, payload);
String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
throw new IOException("Failed create of ephemeral " + node);
}
- // Transition node from MERGING to MERGING and pick up version so we
- // can be sure this znode is ours; version is needed deleting.
- return transitionNodeMerging(zkw, region, serverName, -1, a, b);
}
/**
- * Transitions an existing node for the specified region which is currently in
- * the MERGING state to be in the MERGE state. Converts the ephemeral MERGING
- * znode to an ephemeral MERGE node. Master cleans up MERGE znode when it
- * reads it (or if we crash, zk will clean it up).
+ * Transitions an existing ephemeral node for the specified region which is
+ * currently in the begin state to be in the end state. Master cleans up the
+ * final MERGE znode when it reads it (or if we crash, zk will clean it up).
*
*
* Does not transition nodes from other states. If for some reason the node
@@ -710,19 +778,18 @@ public class RegionMergeTransaction {
* This method can fail and return false for three different reasons:
*
* - Node for this region does not exist
- * - Node for this region is not in MERGING state
- * - After verifying MERGING state, update fails because of wrong version
+ *
- Node for this region is not in the begin state
+ * - After verifying the begin state, update fails because of wrong version
* (this should never actually happen since an RS only does this transition
- * following a transition to MERGING. if two RS are conflicting, one would
- * fail the original transition to MERGING and not this transition)
+ * following a transition to the begin state. If two RS are conflicting, one would
+ * fail the original transition to the begin state and not this transition)
*
*
*
* Does not set any watches.
*
*
- * This method should only be used by a RegionServer when completing the open
- * of merged region.
+ * This method should only be used by a RegionServer when merging two regions.
*
* @param zkw zk reference
* @param merged region to be transitioned to opened
@@ -730,45 +797,19 @@ public class RegionMergeTransaction {
* @param b merging region B
* @param serverName server event originates from
* @param znodeVersion expected version of data before modification
+ * @param beginState the expected current state the znode should be
+ * @param endState the state to be transition to
* @return version of node after transition, -1 if unsuccessful transition
* @throws KeeperException if unexpected zookeeper exception
* @throws IOException
*/
- private static int transitionNodeMerge(ZooKeeperWatcher zkw,
+ public static int transitionMergingNode(ZooKeeperWatcher zkw,
HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
- final int znodeVersion) throws KeeperException, IOException {
+ final int znodeVersion, final EventType beginState,
+ final EventType endState) throws KeeperException, IOException {
byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
return ZKAssign.transitionNode(zkw, merged, serverName,
- EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGED,
- znodeVersion, payload);
- }
-
- /**
- *
- * @param zkw zk reference
- * @param parent region to be transitioned to merging
- * @param serverName server event originates from
- * @param version znode version
- * @return version of node after transition, -1 if unsuccessful transition
- * @throws KeeperException
- * @throws IOException
- */
- int transitionNodeMerging(final ZooKeeperWatcher zkw,
- final HRegionInfo parent, final ServerName serverName, final int version,
- final HRegionInfo a, final HRegionInfo b) throws KeeperException, IOException {
- byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
- return ZKAssign.transitionNode(zkw, parent, serverName,
- EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGING,
- version, payload);
- }
-
- private static int tickleNodeMerge(ZooKeeperWatcher zkw, HRegionInfo merged,
- HRegionInfo a, HRegionInfo b, ServerName serverName,
- final int znodeVersion) throws KeeperException, IOException {
- byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
- return ZKAssign.transitionNode(zkw, merged, serverName,
- EventType.RS_ZK_REGION_MERGED, EventType.RS_ZK_REGION_MERGED,
- znodeVersion, payload);
+ beginState, endState, znodeVersion, payload);
}
/**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
index e8d2ecb4e8a..b728efffb47 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
@@ -18,6 +18,10 @@
*/
package org.apache.hadoop.hbase.regionserver;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_SPLIT;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLIT;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLITTING;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -54,6 +58,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
+import org.apache.zookeeper.data.Stat;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
@@ -296,27 +301,18 @@ public class SplitTransaction {
if (server != null && server.getZooKeeper() != null) {
try {
createNodeSplitting(server.getZooKeeper(),
- this.parent.getRegionInfo(), server.getServerName());
+ parent.getRegionInfo(), server.getServerName(), hri_a, hri_b);
} catch (KeeperException e) {
- throw new IOException("Failed creating SPLITTING znode on " +
+ throw new IOException("Failed creating PENDING_SPLIT znode on " +
this.parent.getRegionNameAsString(), e);
}
}
this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK);
if (server != null && server.getZooKeeper() != null) {
- try {
- // Transition node from SPLITTING to SPLITTING after creating the split node.
- // Master will get the callback for node change only if the transition is successful.
- // Note that if the transition fails then the rollback will delete the created znode
- // as the journal entry SET_SPLITTING_IN_ZK is added.
- // TODO : May be we can add some new state to znode and handle the new state incase
- // of success/failure
- this.znodeVersion = transitionNodeSplitting(server.getZooKeeper(),
- this.parent.getRegionInfo(), server.getServerName(), -1);
- } catch (KeeperException e) {
- throw new IOException("Failed setting SPLITTING znode on "
- + this.parent.getRegionNameAsString(), e);
- }
+ // After creating the split node, wait for master to transition it
+ // from PENDING_SPLIT to SPLITTING so that we can move on. We want master
+ // knows about it and won't transition any region which is splitting.
+ znodeVersion = getZKNode(server, services);
}
this.parent.getRegionFileSystem().createSplitsDir();
@@ -444,9 +440,10 @@ public class SplitTransaction {
// Tell master about split by updating zk. If we fail, abort.
if (server != null && server.getZooKeeper() != null) {
try {
- this.znodeVersion = transitionNodeSplit(server.getZooKeeper(),
+ this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
- server.getServerName(), this.znodeVersion);
+ server.getServerName(), this.znodeVersion,
+ RS_ZK_REGION_SPLITTING, RS_ZK_REGION_SPLIT);
int spins = 0;
// Now wait for the master to process the split. We know it's done
@@ -459,9 +456,10 @@ public class SplitTransaction {
}
Thread.sleep(100);
// When this returns -1 it means the znode doesn't exist
- this.znodeVersion = tickleNodeSplit(server.getZooKeeper(),
+ this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
- server.getServerName(), this.znodeVersion);
+ server.getServerName(), this.znodeVersion,
+ RS_ZK_REGION_SPLIT, RS_ZK_REGION_SPLIT);
spins++;
} while (this.znodeVersion != -1 && !server.isStopped()
&& !services.isStopping());
@@ -483,6 +481,76 @@ public class SplitTransaction {
// deleted and cleaned up.
}
+ /**
+ * Wait for the splitting node to be transitioned from pending_split
+ * to splitting by master. That's how we are sure master has processed
+ * the event and is good with us to move on. If we don't get any update,
+ * we periodically transition the node so that master gets the callback.
+ * If the node is removed or is not in pending_split state any more,
+ * we abort the split.
+ */
+ private int getZKNode(final Server server,
+ final RegionServerServices services) throws IOException {
+ // Wait for the master to process the pending_split.
+ try {
+ int spins = 0;
+ Stat stat = new Stat();
+ ZooKeeperWatcher zkw = server.getZooKeeper();
+ ServerName expectedServer = server.getServerName();
+ String node = parent.getRegionInfo().getEncodedName();
+ while (!(server.isStopped() || services.isStopping())) {
+ if (spins % 5 == 0) {
+ LOG.debug("Still waiting for master to process "
+ + "the pending_split for " + node);
+ transitionSplittingNode(zkw, parent.getRegionInfo(),
+ hri_a, hri_b, expectedServer, -1, RS_ZK_REQUEST_REGION_SPLIT,
+ RS_ZK_REQUEST_REGION_SPLIT);
+ }
+ Thread.sleep(100);
+ spins++;
+ byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
+ if (data == null) {
+ throw new IOException("Data is null, splitting node "
+ + node + " no longer exists");
+ }
+ RegionTransition rt = RegionTransition.parseFrom(data);
+ EventType et = rt.getEventType();
+ if (et == RS_ZK_REGION_SPLITTING) {
+ ServerName serverName = rt.getServerName();
+ if (!serverName.equals(expectedServer)) {
+ throw new IOException("Splitting node " + node + " is for "
+ + serverName + ", not us " + expectedServer);
+ }
+ byte [] payloadOfSplitting = rt.getPayload();
+ List splittingRegions = HRegionInfo.parseDelimitedFrom(
+ payloadOfSplitting, 0, payloadOfSplitting.length);
+ assert splittingRegions.size() == 2;
+ HRegionInfo a = splittingRegions.get(0);
+ HRegionInfo b = splittingRegions.get(1);
+ if (!(hri_a.equals(a) && hri_b.equals(b))) {
+ throw new IOException("Splitting node " + node + " is for " + a + ", "
+ + b + ", not expected daughters: " + hri_a + ", " + hri_b);
+ }
+ // Master has processed it.
+ return stat.getVersion();
+ }
+ if (et != RS_ZK_REQUEST_REGION_SPLIT) {
+ throw new IOException("Splitting node " + node
+ + " moved out of splitting to " + et);
+ }
+ }
+ // Server is stopping/stopped
+ throw new IOException("Server is "
+ + (services.isStopping() ? "stopping" : "stopped"));
+ } catch (Exception e) {
+ if (e instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
+ }
+ throw new IOException("Failed getting SPLITTING znode on "
+ + parent.getRegionNameAsString(), e);
+ }
+ }
+
/**
* Run the transaction.
* @param server Hosting server instance. Can be null when testing (won't try
@@ -719,6 +787,7 @@ public class SplitTransaction {
* @return True if we successfully rolled back, false if we got to the point
* of no return and so now need to abort the server to minimize damage.
*/
+ @SuppressWarnings("deprecation")
public boolean rollback(final Server server, final RegionServerServices services)
throws IOException {
// Coprocessor callback
@@ -801,15 +870,20 @@ public class SplitTransaction {
private static void cleanZK(final Server server, final HRegionInfo hri) {
try {
// Only delete if its in expected state; could have been hijacked.
- ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
- EventType.RS_ZK_REGION_SPLITTING);
+ if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
+ RS_ZK_REQUEST_REGION_SPLIT)) {
+ ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
+ RS_ZK_REGION_SPLITTING);
+ }
+ } catch (KeeperException.NoNodeException e) {
+ LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
} catch (KeeperException e) {
server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
}
}
/**
- * Creates a new ephemeral node in the SPLITTING state for the specified region.
+ * Creates a new ephemeral node in the PENDING_SPLIT state for the specified region.
* Create it ephemeral in case regionserver dies mid-split.
*
* Does not transition nodes from other states. If a node already exists
@@ -818,91 +892,63 @@ public class SplitTransaction {
* @param zkw zk reference
* @param region region to be created as offline
* @param serverName server event originates from
- * @return Version of znode created.
* @throws KeeperException
* @throws IOException
*/
- int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
- final ServerName serverName) throws KeeperException, IOException {
+ public static void createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
+ final ServerName serverName, final HRegionInfo a,
+ final HRegionInfo b) throws KeeperException, IOException {
LOG.debug(zkw.prefix("Creating ephemeral node for " +
- region.getEncodedName() + " in SPLITTING state"));
- RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
- region.getRegionName(), serverName);
+ region.getEncodedName() + " in PENDING_SPLIT state"));
+ byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
+ RegionTransition rt = RegionTransition.createRegionTransition(
+ RS_ZK_REQUEST_REGION_SPLIT, region.getRegionName(), serverName, payload);
String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
throw new IOException("Failed create of ephemeral " + node);
}
- // Transition node from SPLITTING to SPLITTING and pick up version so we
- // can be sure this znode is ours; version is needed deleting.
- return transitionNodeSplitting(zkw, region, serverName, -1);
}
/**
- * Transitions an existing node for the specified region which is
- * currently in the SPLITTING state to be in the SPLIT state. Converts the
- * ephemeral SPLITTING znode to an ephemeral SPLIT node. Master cleans up
- * SPLIT znode when it reads it (or if we crash, zk will clean it up).
+ * Transitions an existing ephemeral node for the specified region which is
+ * currently in the begin state to be in the end state. Master cleans up the
+ * final SPLIT znode when it reads it (or if we crash, zk will clean it up).
*
- *
Does not transition nodes from other states. If for some reason the
- * node could not be transitioned, the method returns -1. If the transition
+ *
Does not transition nodes from other states. If for some reason the
+ * node could not be transitioned, the method returns -1. If the transition
* is successful, the version of the node after transition is returned.
*
*
This method can fail and return false for three different reasons:
*
- Node for this region does not exist
- * - Node for this region is not in SPLITTING state
- * - After verifying SPLITTING state, update fails because of wrong version
+ *
- Node for this region is not in the begin state
+ * - After verifying the begin state, update fails because of wrong version
* (this should never actually happen since an RS only does this transition
- * following a transition to SPLITTING. if two RS are conflicting, one would
- * fail the original transition to SPLITTING and not this transition)
+ * following a transition to the begin state. If two RS are conflicting, one would
+ * fail the original transition to the begin state and not this transition)
*
*
* Does not set any watches.
*
- *
This method should only be used by a RegionServer when completing the
- * open of a region.
+ *
This method should only be used by a RegionServer when splitting a region.
*
* @param zkw zk reference
* @param parent region to be transitioned to opened
* @param a Daughter a of split
* @param b Daughter b of split
* @param serverName server event originates from
+ * @param znodeVersion expected version of data before modification
+ * @param beginState the expected current state the znode should be
+ * @param endState the state to be transition to
* @return version of node after transition, -1 if unsuccessful transition
* @throws KeeperException if unexpected zookeeper exception
* @throws IOException
*/
- private static int transitionNodeSplit(ZooKeeperWatcher zkw,
+ public static int transitionSplittingNode(ZooKeeperWatcher zkw,
HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
- final int znodeVersion)
- throws KeeperException, IOException {
+ final int znodeVersion, final EventType beginState,
+ final EventType endState) throws KeeperException, IOException {
byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
return ZKAssign.transitionNode(zkw, parent, serverName,
- EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLIT,
- znodeVersion, payload);
- }
-
- /**
- *
- * @param zkw zk reference
- * @param parent region to be transitioned to splitting
- * @param serverName server event originates from
- * @param version znode version
- * @return version of node after transition, -1 if unsuccessful transition
- * @throws KeeperException
- * @throws IOException
- */
- int transitionNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo parent,
- final ServerName serverName, final int version) throws KeeperException, IOException {
- return ZKAssign.transitionNode(zkw, parent, serverName,
- EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
- }
-
- private static int tickleNodeSplit(ZooKeeperWatcher zkw,
- HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
- final int znodeVersion)
- throws KeeperException, IOException {
- byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
- return ZKAssign.transitionNode(zkw, parent, serverName,
- EventType.RS_ZK_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT,
- znodeVersion, payload);
+ beginState, endState, znodeVersion, payload);
}
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
index 0bc3d4eef42..fa5f7a4552d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
@@ -84,7 +84,8 @@ public class TestMaster {
LOG.info("Splitting table");
TEST_UTIL.getHBaseAdmin().split(TABLENAME.getName());
LOG.info("Waiting for split result to be about to open");
- while (!m.assignmentManager.wasSplitHandlerCalled()) {
+ RegionStates regionStates = m.assignmentManager.getRegionStates();
+ while (regionStates.getRegionsOfTable(TABLENAME).size() <= 1) {
Thread.sleep(100);
}
LOG.info("Making sure we can call getTableRegions while opening");
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index e96d4fd301d..3a833791219 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -47,14 +46,17 @@ import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.executor.EventType;
+import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
+import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
@@ -148,7 +150,7 @@ public class TestMasterFailover {
*
* @throws Exception
*/
- @Test (timeout=180000)
+ @Test (timeout=240000)
public void testMasterFailoverWithMockedRIT() throws Exception {
final int NUM_MASTERS = 1;
@@ -214,10 +216,30 @@ public class TestMasterFailover {
List disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
+ TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
+ TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
+
log("Regions in hbase:meta and namespace have been created");
- // at this point we only expect 3 regions to be assigned out (catalogs and namespace)
- assertEquals(2, cluster.countServedRegions());
+ // at this point we only expect 4 regions to be assigned out
+ // (catalogs and namespace, + 2 merging regions)
+ assertEquals(4, cluster.countServedRegions());
+
+ // Move merging regions to the same region server
+ AssignmentManager am = master.getAssignmentManager();
+ RegionStates regionStates = am.getRegionStates();
+ List mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
+ assertEquals(2, mergingRegions.size());
+ HRegionInfo a = mergingRegions.get(0);
+ HRegionInfo b = mergingRegions.get(1);
+ HRegionInfo newRegion = RegionMergeTransaction.getMergedRegionInfo(a, b);
+ ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
+ ServerName serverB = regionStates.getRegionServerOfRegion(b);
+ if (!serverB.equals(mergingServer)) {
+ RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
+ am.balance(plan);
+ assertTrue(am.waitForAssignment(b));
+ }
// Let's just assign everything to first RS
HRegionServer hrs = cluster.getRegionServer(0);
@@ -339,6 +361,15 @@ public class TestMasterFailover {
Thread.sleep(100);
}
+ /*
+ * ZK = MERGING
+ */
+
+ // Regions of table of merging regions
+ // Cause: Master was down while merging was going on
+ RegionMergeTransaction.createNodeMerging(
+ zkw, newRegion, mergingServer, a, b);
+
/*
* ZK = NONE
*/
@@ -356,6 +387,16 @@ public class TestMasterFailover {
cluster.waitForActiveAndReadyMaster();
log("Master is ready");
+ // Get new region states since master restarted
+ regionStates = master.getAssignmentManager().getRegionStates();
+ // Merging region should remain merging
+ assertTrue(regionStates.isRegionInState(a, State.MERGING));
+ assertTrue(regionStates.isRegionInState(b, State.MERGING));
+ assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
+ // Now remove the faked merging znode, merging regions should be
+ // offlined automatically, otherwise it is a bug in AM.
+ ZKAssign.deleteNodeFailSilent(zkw, newRegion);
+
// Failover should be completed, now wait for no RIT
log("Waiting for no more RIT");
ZKAssign.blockUntilNoRIT(zkw);
@@ -375,6 +416,9 @@ public class TestMasterFailover {
// Everything that should be offline should not be online
for (HRegionInfo hri : regionsThatShouldBeOffline) {
+ if (onlineRegions.contains(hri)) {
+ LOG.debug(hri);
+ }
assertFalse(onlineRegions.contains(hri));
}
@@ -384,7 +428,6 @@ public class TestMasterFailover {
TEST_UTIL.shutdownMiniCluster();
}
-
/**
* Complex test of master failover that tests as many permutations of the
* different possible states that regions in transition could be in within ZK
@@ -794,7 +837,8 @@ public class TestMasterFailover {
long maxTime = 120000;
boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
if (!done) {
- LOG.info("rit=" + master.getAssignmentManager().getRegionStates().getRegionsInTransition());
+ RegionStates regionStates = master.getAssignmentManager().getRegionStates();
+ LOG.info("rit=" + regionStates.getRegionsInTransition());
}
long elapsed = System.currentTimeMillis() - now;
assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index c856206add1..317d3500caa 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -74,7 +74,6 @@ import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.RegionState.State;
-import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -262,8 +261,6 @@ public class TestSplitTransactionOnCluster {
HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
final List regions = cluster.getRegions(tableName);
final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
- int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
- final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
insertData(tableName.getName(), admin, t);
t.close();
@@ -349,7 +346,7 @@ public class TestSplitTransactionOnCluster {
int regionCount = ProtobufUtil.getOnlineRegions(server).size();
// Now, before we split, set special flag in master, a flag that has
// it FAIL the processing of split.
- SplitRegionHandler.TEST_SKIP = true;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
// Now try splitting and it should work.
split(hri, server, regionCount);
// Get daughters
@@ -357,15 +354,18 @@ public class TestSplitTransactionOnCluster {
// Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
hri.getEncodedName());
- Stat stats =
- TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
- LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
- RegionTransition rt =
- RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
+ RegionTransition rt = null;
+ Stat stats = null;
+ // Wait till the znode moved to SPLIT
+ for (int i=0; i<100; i++) {
+ stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
+ rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
hri.getEncodedName()));
- // State could be SPLIT or SPLITTING.
- assertTrue(rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT) ||
- rt.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
+ if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
+ Thread.sleep(100);
+ }
+ LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
+ assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
// Now crash the server
cluster.abortRegionServer(tableRegionIndex);
waitUntilRegionServerDead();
@@ -387,7 +387,7 @@ public class TestSplitTransactionOnCluster {
assertTrue(stats == null);
} finally {
// Set this flag back.
- SplitRegionHandler.TEST_SKIP = false;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
admin.setBalancerRunning(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true);
t.close();
@@ -645,7 +645,7 @@ public class TestSplitTransactionOnCluster {
printOutRegions(server, "Initial regions: ");
// Now, before we split, set special flag in master, a flag that has
// it FAIL the processing of split.
- SplitRegionHandler.TEST_SKIP = true;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
// Now try splitting and it should work.
this.admin.split(hri.getRegionNameAsString());
@@ -675,7 +675,7 @@ public class TestSplitTransactionOnCluster {
assertTrue(regionServerOfRegion != null);
// Remove the block so that split can move ahead.
- SplitRegionHandler.TEST_SKIP = false;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
@@ -692,7 +692,7 @@ public class TestSplitTransactionOnCluster {
assertTrue(regionServerOfRegion == null);
} finally {
// Set this flag back.
- SplitRegionHandler.TEST_SKIP = false;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
admin.setBalancerRunning(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true);
t.close();
@@ -765,8 +765,6 @@ public class TestSplitTransactionOnCluster {
ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
assertTrue(regionServerOfRegion == null);
} finally {
- // Set this flag back.
- SplitRegionHandler.TEST_SKIP = false;
this.admin.setBalancerRunning(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true);
t.close();
@@ -998,8 +996,8 @@ public class TestSplitTransactionOnCluster {
assertTrue("not able to find a splittable region", region != null);
SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
@Override
- int createNodeSplitting(ZooKeeperWatcher zkw, HRegionInfo region,
- ServerName serverName) throws KeeperException, IOException {
+ public PairOfSameType stepsBeforePONR(final Server server,
+ final RegionServerServices services, boolean testing) throws IOException {
throw new SplittingNodeCreationFailedException ();
}
};