HBASE-624 Master will shut down if number of active region servers is zero even if shutdown was not requested

M HMaster

- Moved HMaster.quiescedMetaServers to ServerManager.quiescedServers and changed name since only servers serving user regions get quiesced.
- Removed HMaster.tableInCreation - not used

M ServerManager

- Don't check if quiescedServers.get() >= serversToServerInfo.size() unless master.shutdownRequested is true.

M HRegionServer

- Change order of checks in main loop of HRegionServer.run, so that booleans are checked before we check the number of messages to process
- Don't break out of main loop if restart or stop requested - stop is checked at top of loop and we need to continue rather than break out of main loop if restart was requested



git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@655962 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2008-05-13 18:25:31 +00:00
parent 197cf5c7d7
commit 8d5e1e7025
4 changed files with 31 additions and 30 deletions

View File

@ -37,6 +37,8 @@ Hbase Change Log
HBASE-453 undeclared throwable exception from HTable.get
HBASE-620 testmergetool failing in branch and trunk since hbase-618 went in
HBASE-622 Remove StaticTestEnvironment and put a log4j.properties in src/test
HBASE-624 Master will shut down if number of active region servers is zero
even if shutdown was not requested
IMPROVEMENTS
HBASE-559 MR example job to count table rows

View File

@ -21,18 +21,14 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -104,7 +100,6 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
// hosting class
volatile AtomicBoolean closed = new AtomicBoolean(true);
volatile boolean shutdownRequested = false;
volatile AtomicInteger quiescedMetaServers = new AtomicInteger(0);
volatile boolean fsOk = true;
final Path rootdir;
final HBaseConfiguration conf;
@ -142,10 +137,6 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
return infoServer;
}
/** Set of tables currently in creation. */
private volatile Set<Text> tableInCreation =
Collections.synchronizedSet(new HashSet<Text>());
ServerManager serverManager;
RegionManager regionManager;

View File

@ -28,6 +28,7 @@ import java.util.Set;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.Collections;
import org.apache.commons.logging.Log;
@ -50,6 +51,8 @@ import org.apache.hadoop.io.Text;
class ServerManager implements HConstants {
static final Log LOG = LogFactory.getLog(ServerManager.class.getName());
private final AtomicInteger quiescedServers = new AtomicInteger(0);
/** The map of known server names to server info */
final Map<String, HServerInfo> serversToServerInfo =
new ConcurrentHashMap<String, HServerInfo>();
@ -161,25 +164,27 @@ class ServerManager implements HConstants {
return new HMsg[0];
} else if (msgs[0].getMsg() == HMsg.MSG_REPORT_QUIESCED) {
LOG.info("Region server " + serverName + " quiesced");
master.quiescedMetaServers.incrementAndGet();
quiescedServers.incrementAndGet();
}
}
if(master.quiescedMetaServers.get() >= serversToServerInfo.size()) {
// If the only servers we know about are meta servers, then we can
// proceed with shutdown
LOG.info("All user tables quiesced. Proceeding with shutdown");
master.startShutdown();
}
if (master.shutdownRequested && !master.closed.get()) {
if (msgs.length > 0 && msgs[0].getMsg() == HMsg.MSG_REPORT_QUIESCED) {
// Server is already quiesced, but we aren't ready to shut down
// return empty response
return new HMsg[0];
if (master.shutdownRequested) {
if(quiescedServers.get() >= serversToServerInfo.size()) {
// If the only servers we know about are meta servers, then we can
// proceed with shutdown
LOG.info("All user tables quiesced. Proceeding with shutdown");
master.startShutdown();
}
if (!master.closed.get()) {
if (msgs.length > 0 && msgs[0].getMsg() == HMsg.MSG_REPORT_QUIESCED) {
// Server is already quiesced, but we aren't ready to shut down
// return empty response
return new HMsg[0];
}
// Tell the server to stop serving any user regions
return new HMsg[]{new HMsg(HMsg.MSG_REGIONSERVER_QUIESCE)};
}
// Tell the server to stop serving any user regions
return new HMsg[]{new HMsg(HMsg.MSG_REGIONSERVER_QUIESCE)};
}
if (master.closed.get()) {

View File

@ -306,8 +306,10 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// Queue up the HMaster's instruction stream for processing
boolean restart = false;
for(int i = 0; i < msgs.length && !stopRequested.get() &&
!restart; i++) {
for(int i = 0;
!restart && !stopRequested.get() && i < msgs.length;
i++) {
switch(msgs[i].getMsg()) {
case HMsg.MSG_CALL_SERVER_STARTUP:
@ -370,12 +372,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
}
}
}
if (restart || this.stopRequested.get()) {
toDo.clear();
break;
}
// Reset tries count if we had a successful transaction.
tries = 0;
if (restart || this.stopRequested.get()) {
toDo.clear();
continue;
}
} catch (Exception e) {
if (e instanceof IOException) {
e = RemoteExceptionHandler.checkIOException((IOException) e);