HBASE-3332 Regions stuck in transition after RS failure

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1049238 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Gray 2010-12-14 20:01:03 +00:00
parent c0c723f660
commit 36be388220
3 changed files with 17 additions and 9 deletions

View File

@ -21,6 +21,8 @@ Release 0.91.0 - Unreleased
IllegalArgumentException during parameter sanity check
HBASE-3337 Restore HBCK fix of unassignment and dupe assignment for new
master
HBASE-3332 Regions stuck in transition after RS failure
IMPROVEMENTS
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via

View File

@ -1654,9 +1654,9 @@ public class AssignmentManager extends ZooKeeperListener {
/**
* Process shutdown server removing any assignments.
* @param hsi Server that went down.
* @return set of regions on this server that are not in transition
* @return list of regions in transition on this server
*/
public List<HRegionInfo> processServerShutdown(final HServerInfo hsi) {
public List<RegionState> processServerShutdown(final HServerInfo hsi) {
// Clean out any existing assignment plans for this server
synchronized (this.regionPlans) {
for (Iterator <Map.Entry<String, RegionPlan>> i =
@ -1672,7 +1672,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Remove this server from map of servers to regions, and remove all regions
// of this server from online map of regions.
Set<HRegionInfo> deadRegions = null;
List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
List<RegionState> rits = new ArrayList<RegionState>();
synchronized (this.regions) {
List<HRegionInfo> assignedRegions = this.servers.remove(hsi);
if (assignedRegions == null || assignedRegions.isEmpty()) {
@ -1690,7 +1690,7 @@ public class AssignmentManager extends ZooKeeperListener {
synchronized (regionsInTransition) {
for (RegionState region : this.regionsInTransition.values()) {
if (deadRegions.remove(region.getRegion())) {
rits.add(region.getRegion());
rits.add(region);
}
}
}

View File

@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.zookeeper.KeeperException;
@ -98,7 +99,7 @@ public class ServerShutdownHandler extends EventHandler {
// doing after log splitting. Could do some states before -- OPENING?
// OFFLINE? -- and then others after like CLOSING that depend on log
// splitting.
List<HRegionInfo> regionsInTransition =
List<RegionState> regionsInTransition =
this.services.getAssignmentManager().processServerShutdown(this.hsi);
// Assign root and meta if we were carrying them.
@ -133,11 +134,16 @@ public class ServerShutdownHandler extends EventHandler {
}
}
// Remove regions that were in transition
for (HRegionInfo rit : regionsInTransition) hris.remove(rit);
LOG.info("Reassigning the " + hris.size() + " region(s) that " + serverName
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE
for (RegionState rit : regionsInTransition) {
if (!rit.isClosing() && !rit.isPendingClose()) {
hris.remove(rit.getRegion());
}
}
LOG.info("Reassigning " + hris.size() + " region(s) that " + serverName
+ " was carrying (skipping " + regionsInTransition.size() +
" regions(s) that are in transition)");
" regions(s) that are already in transition)");
// Iterate regions that were on this server and assign them
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {