HBASE-3332 Regions stuck in transition after RS failure

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1049238 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Gray 2010-12-14 20:01:03 +00:00
parent c0c723f660
commit 36be388220
3 changed files with 17 additions and 9 deletions

View File

@ -21,6 +21,8 @@ Release 0.91.0 - Unreleased
IllegalArgumentException during parameter sanity check IllegalArgumentException during parameter sanity check
HBASE-3337 Restore HBCK fix of unassignment and dupe assignment for new HBASE-3337 Restore HBCK fix of unassignment and dupe assignment for new
master master
HBASE-3332 Regions stuck in transition after RS failure
IMPROVEMENTS IMPROVEMENTS
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via

View File

@ -1654,9 +1654,9 @@ public class AssignmentManager extends ZooKeeperListener {
/** /**
* Process shutdown server removing any assignments. * Process shutdown server removing any assignments.
* @param hsi Server that went down. * @param hsi Server that went down.
* @return set of regions on this server that are not in transition * @return list of regions in transition on this server
*/ */
public List<HRegionInfo> processServerShutdown(final HServerInfo hsi) { public List<RegionState> processServerShutdown(final HServerInfo hsi) {
// Clean out any existing assignment plans for this server // Clean out any existing assignment plans for this server
synchronized (this.regionPlans) { synchronized (this.regionPlans) {
for (Iterator <Map.Entry<String, RegionPlan>> i = for (Iterator <Map.Entry<String, RegionPlan>> i =
@ -1672,7 +1672,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Remove this server from map of servers to regions, and remove all regions // Remove this server from map of servers to regions, and remove all regions
// of this server from online map of regions. // of this server from online map of regions.
Set<HRegionInfo> deadRegions = null; Set<HRegionInfo> deadRegions = null;
List<HRegionInfo> rits = new ArrayList<HRegionInfo>(); List<RegionState> rits = new ArrayList<RegionState>();
synchronized (this.regions) { synchronized (this.regions) {
List<HRegionInfo> assignedRegions = this.servers.remove(hsi); List<HRegionInfo> assignedRegions = this.servers.remove(hsi);
if (assignedRegions == null || assignedRegions.isEmpty()) { if (assignedRegions == null || assignedRegions.isEmpty()) {
@ -1690,7 +1690,7 @@ public class AssignmentManager extends ZooKeeperListener {
synchronized (regionsInTransition) { synchronized (regionsInTransition) {
for (RegionState region : this.regionsInTransition.values()) { for (RegionState region : this.regionsInTransition.values()) {
if (deadRegions.remove(region.getRegion())) { if (deadRegions.remove(region.getRegion())) {
rits.add(region.getRegion()); rits.add(region);
} }
} }
} }

View File

@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.DeadServer; import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.util.Writables;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
@ -98,7 +99,7 @@ public class ServerShutdownHandler extends EventHandler {
// doing after log splitting. Could do some states before -- OPENING? // doing after log splitting. Could do some states before -- OPENING?
// OFFLINE? -- and then others after like CLOSING that depend on log // OFFLINE? -- and then others after like CLOSING that depend on log
// splitting. // splitting.
List<HRegionInfo> regionsInTransition = List<RegionState> regionsInTransition =
this.services.getAssignmentManager().processServerShutdown(this.hsi); this.services.getAssignmentManager().processServerShutdown(this.hsi);
// Assign root and meta if we were carrying them. // Assign root and meta if we were carrying them.
@ -133,11 +134,16 @@ public class ServerShutdownHandler extends EventHandler {
} }
} }
// Remove regions that were in transition // Skip regions that were in transition unless CLOSING or PENDING_CLOSE
for (HRegionInfo rit : regionsInTransition) hris.remove(rit); for (RegionState rit : regionsInTransition) {
LOG.info("Reassigning the " + hris.size() + " region(s) that " + serverName if (!rit.isClosing() && !rit.isPendingClose()) {
hris.remove(rit.getRegion());
}
}
LOG.info("Reassigning " + hris.size() + " region(s) that " + serverName
+ " was carrying (skipping " + regionsInTransition.size() + + " was carrying (skipping " + regionsInTransition.size() +
" regions(s) that are in transition)"); " regions(s) that are already in transition)");
// Iterate regions that were on this server and assign them // Iterate regions that were on this server and assign them
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) { for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {