HBASE-964, HBASE-678 provide for safe-mode without locking up HBase "waiting for root region"

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@712722 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2008-11-10 19:03:01 +00:00
parent d71535eca7
commit 1a0fc48508
5 changed files with 43 additions and 22 deletions

View File

@ -66,7 +66,8 @@ Release 0.19.0 - Unreleased
HBASE-984 Fix javadoc warnings
HBASE-985 Fix javadoc warnings
HBASE-951 Either shut down master or let it finish cleanup
HBASE-964 Startup stuck "waiting for root region"
HBASE-964, HBASE-678 provide for safe-mode without locking up HBase "waiting
for root region"
IMPROVEMENTS
HBASE-901 Add a limit to key length, check key and value length on client side

View File

@ -132,7 +132,7 @@ public class HConnectionManager implements HConstants {
private final Map<String, HRegionInterface> servers =
new ConcurrentHashMap<String, HRegionInterface>();
private HRegionLocation rootRegionLocation;
private volatile HRegionLocation rootRegionLocation;
private final Map<Integer, SoftValueSortedMap<byte [], HRegionLocation>>
cachedRegionLocations = Collections.synchronizedMap(

View File

@ -800,7 +800,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
public HServerAddress findRootRegion() {
HServerAddress rootServer = null;
if (regionManager.isInitialMetaScanComplete()) {
if (!regionManager.inSafeMode()) {
rootServer = regionManager.getRootRegionLocation();
}
return rootServer;

View File

@ -62,6 +62,8 @@ class RegionManager implements HConstants {
private volatile AtomicReference<HServerAddress> rootRegionLocation =
new AtomicReference<HServerAddress>(null);
private volatile boolean safeMode = true;
final Lock splitLogLock = new ReentrantLock();
private final RootScanner rootScannerThread;
@ -190,7 +192,7 @@ class RegionManager implements HConstants {
Set<HRegionInfo> regionsToAssign = regionsAwaitingAssignment();
if (regionsToAssign.size() == 0) {
// There are no regions waiting to be assigned.
if (allRegionsAssigned()) {
if (!inSafeMode()) {
// We only do load balancing once all regions are assigned.
// This prevents churn while the cluster is starting up.
double avgLoad = master.serverManager.getAverageLoad();
@ -860,9 +862,17 @@ class RegionManager implements HConstants {
* @return true if the initial meta scan is complete and there are no
* unassigned or pending regions
*/
public boolean allRegionsAssigned() {
return isInitialMetaScanComplete() && unassignedRegions.size() == 0 &&
pendingRegions.size() == 0;
public boolean inSafeMode() {
if (safeMode) {
if(isInitialMetaScanComplete() && unassignedRegions.size() == 0 &&
pendingRegions.size() == 0) {
safeMode = false;
LOG.info("exiting safe mode");
} else {
LOG.info("in safe mode");
}
}
return safeMode;
}
/**

View File

@ -123,6 +123,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
protected final HBaseConfiguration conf;
private final ServerConnection connection;
private final AtomicBoolean haveRootRegion = new AtomicBoolean(false);
private FileSystem fs;
private Path rootDir;
private final Random rand = new Random();
@ -303,23 +304,22 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
boolean quiesceRequested = false;
// A sleeper that sleeps for msgInterval.
Sleeper sleeper = new Sleeper(this.msgInterval, this.stopRequested);
boolean haveRootRegion = false;
try {
init(reportForDuty(sleeper));
long lastMsg = 0;
// Now ask master what it wants us to do and tell it what we have done
for (int tries = 0; !stopRequested.get() && isHealthy();) {
// Try to get the root region location from the master.
if (!haveRootRegion) {
if (!haveRootRegion.get()) {
HServerAddress rootServer = hbaseMaster.getRootRegionLocation();
if (rootServer != null) {
// By setting the root region location, we bypass the wait imposed on
// HTable for all regions being assigned.
this.connection.setRootRegionLocation(
new HRegionLocation(HRegionInfo.ROOT_REGIONINFO, rootServer));
haveRootRegion = true;
haveRootRegion.set(true);
}
}
long lastMsg = 0;
// Now ask master what it wants us to do and tell it what we have done
for (int tries = 0; !stopRequested.get() && isHealthy();) {
long now = System.currentTimeMillis();
if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
// It has been way too long since we last reported to the master.
@ -890,6 +890,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
continue;
}
LOG.info(e.msg);
HRegionInfo info = e.msg.getRegionInfo();
switch(e.msg.getType()) {
case MSG_REGIONSERVER_QUIESCE:
@ -898,7 +899,18 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
case MSG_REGION_OPEN:
// Open a region
openRegion(e.msg.getRegionInfo());
if (!haveRootRegion.get() && !info.isRootRegion()) {
// root region is not online yet. requeue this task
LOG.info("putting region open request back into queue because" +
" root region is not yet available");
try {
toDo.put(e);
} catch (InterruptedException ex) {
LOG.warn("insertion into toDo queue was interrupted", ex);
break;
}
}
openRegion(info);
break;
case MSG_REGION_CLOSE:
@ -912,7 +924,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
break;
case MSG_REGION_SPLIT: {
HRegionInfo info = e.msg.getRegionInfo();
// Force split a region
HRegion region = getRegion(info.getRegionName());
region.regionInfo.shouldSplit(true);
@ -921,7 +932,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
case MSG_REGION_COMPACT: {
// Compact a region
HRegionInfo info = e.msg.getRegionInfo();
HRegion region = getRegion(info.getRegionName());
compactSplitThread.compactionRequested(region);
} break;