From 14b68ca4c0b21ced1f4ff2bc484e9b2c0f1a17cb Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 5 May 2010 22:14:05 +0000 Subject: [PATCH] HBASE-2513 hbase-2414 added bug where we'd tight-loop if no root available git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@941517 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/hbase/master/HMaster.java | 9 ++++++--- .../master/RegionServerOperationQueue.java | 18 ++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/core/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index ed31cbcd996..5bc98b9e1e8 100644 --- a/core/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/core/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -445,14 +445,17 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, break; } } - boolean doDelayQueue = this.regionManager.getRootRegionLocation() != null; - switch (this.regionServerOperationQueue.process(doDelayQueue)) { + final HServerAddress root = this.regionManager.getRootRegionLocation(); + switch (this.regionServerOperationQueue.process(root)) { case FAILED: + // If FAILED op processing, bad. Exit. break FINISHED; case REQUEUED_BUT_PROBLEM: if (!checkFileSystem()) + // If bad filesystem, exit. break FINISHED; - default: // PROCESSED, NOOP, REQUEUED: + default: + // Continue run loop if conditions are PROCESSED, NOOP, REQUEUED break; } } diff --git a/core/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java b/core/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java index 53b0bd787d0..c85c1414b08 100644 --- a/core/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java +++ b/core/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java @@ -33,6 +33,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.util.Sleeper; import org.apache.hadoop.ipc.RemoteException; @@ -109,19 +110,20 @@ public class RegionServerOperationQueue { /** * Try to get an operation off of the queue and process it. - * @param skipDelayedToDos If true, do not do delayed todos first but instead - * move straight to the current todos list. This is set when we want to be - * sure that recently queued events are processed first such as the onlining - * of root region (Root region needs to be online before we can do meta - * onlining; meta onlining needs to be done before we can do... and so on). + * @param rootRegionLocation Location of the root region. * @return {@link ProcessingResultCode#PROCESSED}, * {@link ProcessingResultCode#REQUEUED}, * {@link ProcessingResultCode#REQUEUED_BUT_PROBLEM} */ - public synchronized ProcessingResultCode process(final boolean skipDelayedToDos) { - RegionServerOperation op = delayedToDoQueue.poll(); + public synchronized ProcessingResultCode process(final HServerAddress rootRegionLocation) { + RegionServerOperation op = null; + // Only process the delayed queue if root region is online. If offline, + // the operation to put it online is probably in the toDoQueue. Process + // it first. + if (rootRegionLocation != null) { + op = delayedToDoQueue.poll(); + } else { // if there aren't any todo items in the queue, sleep for a bit. - if (op == null) { try { op = toDoQueue.poll(threadWakeFrequency, TimeUnit.MILLISECONDS); } catch (InterruptedException e) {