HBASE-3263 Stack overflow in AssignmentManager

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1040360 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-11-30 00:42:03 +00:00
parent c849b4c26d
commit 1674a7ee41
1 changed files with 31 additions and 45 deletions

View File

@ -96,7 +96,7 @@ public class AssignmentManager extends ZooKeeperListener {
/* /*
* Maximum times we recurse an assignment. See below in {@link #assign()}. * Maximum times we recurse an assignment. See below in {@link #assign()}.
*/ */
private final int maximumAssignmentRecursions; private final int maximumAssignmentAttempts;
/** /**
* Regions currently in transition. Map of encoded region names to the master * Regions currently in transition. Map of encoded region names to the master
@ -163,8 +163,8 @@ public class AssignmentManager extends ZooKeeperListener {
Threads.setDaemonThreadRunning(timeoutMonitor, Threads.setDaemonThreadRunning(timeoutMonitor,
master.getServerName() + ".timeoutMonitor"); master.getServerName() + ".timeoutMonitor");
this.zkTable = new ZKTable(this.master.getZooKeeper()); this.zkTable = new ZKTable(this.master.getZooKeeper());
this.maximumAssignmentRecursions = this.maximumAssignmentAttempts =
this.master.getConfiguration().getInt("hbase.assignment.maximum.recursions", 10); this.master.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10);
} }
/** /**
@ -823,51 +823,37 @@ public class AssignmentManager extends ZooKeeperListener {
*/ */
private void assign(final RegionState state, final boolean setOfflineInZK, private void assign(final RegionState state, final boolean setOfflineInZK,
final boolean forceNewPlan) { final boolean forceNewPlan) {
assign(state, setOfflineInZK, forceNewPlan, new AtomicInteger(0)); for (int i = 0; i < this.maximumAssignmentAttempts; i++) {
} if (setOfflineInZK && !setOfflineInZooKeeper(state)) return;
if (this.master.isStopped()) {
/** LOG.debug("Server stopped; skipping assign of " + state);
* Caller must hold lock on the passed <code>state</code> object.
* @param state
* @param setOfflineInZK
* @param forceNewPlan
* @param recursions Keep a count so can have upper bound on recursions.
*/
private void assign(final RegionState state, final boolean setOfflineInZK,
final boolean forceNewPlan, final int recursions) {
if (setOfflineInZK && !setOfflineInZooKeeper(state)) return;
if (this.master.isStopped()) {
LOG.debug("Server stopped; skipping assign of " + state);
return;
}
RegionPlan plan = getRegionPlan(state, forceNewPlan);
if (plan == null) return; // Should get reassigned later when RIT times out.
try {
LOG.debug("Assigning region " + state.getRegion().getRegionNameAsString() +
" to " + plan.getDestination().getServerName());
// Transition RegionState to PENDING_OPEN
state.update(RegionState.State.PENDING_OPEN);
// Send OPEN RPC. This can fail if the server on other end is is not up.
serverManager.sendRegionOpen(plan.getDestination(), state.getRegion());
} catch (Throwable t) {
LOG.warn("Failed assignment of " +
state.getRegion().getRegionNameAsString() + " to " +
plan.getDestination() + ", trying to assign elsewhere instead; retry=" +
recursions, t);
// Clean out plan we failed execute and one that doesn't look like it'll
// succeed anyways; we need a new plan!
// Transition back to OFFLINE
state.update(RegionState.State.OFFLINE);
// Force a new plan and reassign. Will return null if no servers.
if (getRegionPlan(state, plan.getDestination(), true) == null) {
LOG.warn("Unable to find a viable location to assign region " +
state.getRegion().getRegionNameAsString());
return; return;
} }
if (recursions < this.maximumAssignmentRecursions) { RegionPlan plan = getRegionPlan(state, forceNewPlan);
assign(state, false, false, recursions + 1); if (plan == null) return; // Should get reassigned later when RIT times out.
try {
LOG.debug("Assigning region " + state.getRegion().getRegionNameAsString() +
" to " + plan.getDestination().getServerName());
// Transition RegionState to PENDING_OPEN
state.update(RegionState.State.PENDING_OPEN);
// Send OPEN RPC. This can fail if the server on other end is is not up.
serverManager.sendRegionOpen(plan.getDestination(), state.getRegion());
} catch (Throwable t) {
LOG.warn("Failed assignment of " +
state.getRegion().getRegionNameAsString() + " to " +
plan.getDestination() + ", trying to assign elsewhere instead; " +
"retry=" + i, t);
// Clean out plan we failed execute and one that doesn't look like it'll
// succeed anyways; we need a new plan!
// Transition back to OFFLINE
state.update(RegionState.State.OFFLINE);
// Force a new plan and reassign. Will return null if no servers.
if (getRegionPlan(state, plan.getDestination(), true) == null) {
LOG.warn("Unable to find a viable location to assign region " +
state.getRegion().getRegionNameAsString());
return;
}
} }
// Else Just leave the region in RIT. On timeout, we'll retry later.
} }
} }