HBASE-1457 Taking down ROOT/META regionserver can result in cluster becoming in-operational

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@780436 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2009-05-31 14:32:56 +00:00
parent 3946cd29c0
commit e49a037aeb
13 changed files with 340 additions and 169 deletions

View File

@ -156,6 +156,8 @@ Release 0.20.0 - Unreleased
HBASE-1395 InfoServers no longer put up a UI
HBASE-1302 When a new master comes up, regionservers should continue with
their region assignments from the last master
HBASE-1457 Taking down ROOT/META regionserver can result in cluster
becoming in-operational (Ryan Rawson via Stack)
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage

View File

@ -344,7 +344,7 @@ public class HConnectionManager implements HConstants {
rowResult.get(COL_REGIONINFO));
// Only examine the rows where the startKey is zero length
if (info.getStartKey().length == 0) {
if (info != null && info.getStartKey().length == 0) {
uniqueTables.add(info.getTableDesc());
}
return true;

View File

@ -65,7 +65,7 @@ import org.apache.hadoop.util.ReflectionUtils;
public class HBaseClient {
public static final Log LOG =
LogFactory.getLog("org.apache.hadoop.ipc.HBaseClass");
LogFactory.getLog("org.apache.hadoop.ipc.HBaseClient");
protected Hashtable<ConnectionId, Connection> connections =
new Hashtable<ConnectionId, Connection>();

View File

@ -34,6 +34,7 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
@ -124,7 +125,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
volatile DelayQueue<RegionServerOperation> delayedToDoQueue =
new DelayQueue<RegionServerOperation>();
volatile BlockingQueue<RegionServerOperation> toDoQueue =
new LinkedBlockingQueue<RegionServerOperation>();
new PriorityBlockingQueue<RegionServerOperation>();
private final HBaseServer server;
private final HServerAddress address;
@ -235,6 +236,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
// The rpc-server port can be ephemeral... ensure we have the correct info
this.address = new HServerAddress(server.getListenerAddress());
// dont retry too much
conf.setInt("hbase.client.retries.number", 3);
this.connection = ServerConnectionManager.getConnection(conf);
this.metaRescanInterval =
@ -494,15 +498,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
return false;
}
LOG.warn("Processing pending operations: " + op.toString(), ex);
try {
// put the operation back on the queue... maybe it'll work next time.
toDoQueue.put(op);
} catch (InterruptedException e) {
throw new RuntimeException(
"Putting into toDoQueue was interrupted.", e);
} catch (Exception e) {
LOG.error("main processing loop: " + op.toString(), e);
}
delayedToDoQueue.put(op);
}
return true;
}
@ -549,7 +545,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
else if(region.isMetaRegion()) {
MetaRegion m =
new MetaRegion(new HServerAddress(address),
region.getRegionName(), region.getStartKey());
region);
regionManager.addMetaRegionToScan(m);
}
assignedRegions.put(region.getRegionName(), region);

View File

@ -23,42 +23,36 @@ import java.util.Arrays;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.util.Bytes;
/** Describes a meta region and its server */
public class MetaRegion implements Comparable<MetaRegion> {
private final HServerAddress server;
private final byte [] regionName;
private final byte [] startKey;
private HRegionInfo regionInfo;
MetaRegion(final HServerAddress server, final byte [] regionName) {
this (server, regionName, HConstants.EMPTY_START_ROW);
}
MetaRegion(final HServerAddress server, final byte [] regionName,
final byte [] startKey) {
MetaRegion(final HServerAddress server, HRegionInfo regionInfo) {
if (server == null) {
throw new IllegalArgumentException("server cannot be null");
}
this.server = server;
if (regionName == null) {
throw new IllegalArgumentException("regionName cannot be null");
if (regionInfo == null) {
throw new IllegalArgumentException("regionInfo cannot be null");
}
this.regionName = regionName;
this.startKey = startKey;
this.regionInfo = regionInfo;
}
@Override
public String toString() {
return "{regionname: " + Bytes.toString(this.regionName) +
", startKey: <" + Bytes.toString(this.startKey) +
">, server: " + this.server.toString() + "}";
return "{server: " + this.server.toString() + ", regionname: " +
regionInfo.getRegionNameAsString() + ", startKey: <" +
Bytes.toString(regionInfo.getStartKey()) + ">}";
}
/** @return the regionName */
public byte [] getRegionName() {
return regionName;
return regionInfo.getRegionName();
}
/** @return the server */
@ -68,7 +62,11 @@ public class MetaRegion implements Comparable<MetaRegion> {
/** @return the startKey */
public byte [] getStartKey() {
return startKey;
return regionInfo.getStartKey();
}
public HRegionInfo getRegionInfo() {
return regionInfo;
}
@Override
@ -78,22 +76,17 @@ public class MetaRegion implements Comparable<MetaRegion> {
@Override
public int hashCode() {
int result = Arrays.hashCode(this.regionName);
result ^= Arrays.hashCode(this.startKey);
return result;
return regionInfo.hashCode();
}
// Comparable
public int compareTo(MetaRegion other) {
int result = Bytes.compareTo(this.regionName, other.getRegionName());
if(result == 0) {
result = Bytes.compareTo(this.startKey, other.getStartKey());
if (result == 0) {
// Might be on different host?
result = this.server.compareTo(other.server);
}
int cmp = regionInfo.compareTo(other.regionInfo);
if(cmp == 0) {
// Might be on different host?
cmp = this.server.compareTo(other.server);
}
return result;
return cmp;
}
}

View File

@ -25,6 +25,7 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.RegionHistorian;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.util.Bytes;
@ -58,67 +59,68 @@ class ProcessRegionOpen extends ProcessRegionStatusChange {
@Override
protected boolean process() throws IOException {
Boolean result =
new RetryableMetaOperation<Boolean>(getMetaRegion(), this.master) {
private final RegionHistorian historian = RegionHistorian.getInstance();
public Boolean call() throws IOException {
LOG.info(regionInfo.getRegionNameAsString() + " open on " +
serverInfo.getServerAddress().toString());
if (!metaRegionAvailable()) {
// We can't proceed unless the meta region we are going to update
// is online. metaRegionAvailable() has put this operation on the
// delayedToDoQueue, so return true so the operation is not put
// back on the toDoQueue
return true;
}
if (!metaRegionAvailable()) {
// We can't proceed unless the meta region we are going to update
// is online. metaRegionAvailable() has put this operation on the
// delayedToDoQueue, so return true so the operation is not put
// back on the toDoQueue
return true;
}
// Register the newly-available Region's location.
LOG.info("updating row " + regionInfo.getRegionNameAsString() +
" in region " + Bytes.toString(metaRegionName) + " with " +
" with startcode " + serverInfo.getStartCode() + " and server " +
serverInfo.getServerAddress());
BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
b.put(COL_SERVER,
Bytes.toBytes(serverInfo.getServerAddress().toString()));
b.put(COL_STARTCODE, Bytes.toBytes(serverInfo.getStartCode()));
server.batchUpdate(metaRegionName, b, -1L);
if (!this.historian.isOnline()) {
// This is safest place to do the onlining of the historian in
// the master. When we get to here, we know there is a .META.
// for the historian to go against.
this.historian.online(this.master.getConfiguration());
final RegionHistorian historian = RegionHistorian.getInstance();
HRegionInterface server =
master.connection.getHRegionConnection(getMetaRegion().getServer());
LOG.info(regionInfo.getRegionNameAsString() + " open on " +
serverInfo.getServerAddress().toString());
// Register the newly-available Region's location.
LOG.info("updating row " + regionInfo.getRegionNameAsString() +
" in region " + Bytes.toString(metaRegionName) + " with " +
" with startcode " + serverInfo.getStartCode() + " and server " +
serverInfo.getServerAddress());
BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
b.put(COL_SERVER,
Bytes.toBytes(serverInfo.getServerAddress().toString()));
b.put(COL_STARTCODE, Bytes.toBytes(serverInfo.getStartCode()));
server.batchUpdate(metaRegionName, b, -1L);
if (!historian.isOnline()) {
// This is safest place to do the onlining of the historian in
// the master. When we get to here, we know there is a .META.
// for the historian to go against.
historian.online(this.master.getConfiguration());
}
historian.addRegionOpen(regionInfo, serverInfo.getServerAddress());
synchronized (master.regionManager) {
if (isMetaTable) {
// It's a meta region.
MetaRegion m =
new MetaRegion(new HServerAddress(serverInfo.getServerAddress()),
regionInfo);
if (!master.regionManager.isInitialMetaScanComplete()) {
// Put it on the queue to be scanned for the first time.
if (LOG.isDebugEnabled()) {
LOG.debug("Adding " + m.toString() + " to regions to scan");
}
this.historian.addRegionOpen(regionInfo, serverInfo.getServerAddress());
synchronized (master.regionManager) {
if (isMetaTable) {
// It's a meta region.
MetaRegion m =
new MetaRegion(new HServerAddress(serverInfo.getServerAddress()),
regionInfo.getRegionName(), regionInfo.getStartKey());
if (!master.regionManager.isInitialMetaScanComplete()) {
// Put it on the queue to be scanned for the first time.
if (LOG.isDebugEnabled()) {
LOG.debug("Adding " + m.toString() + " to regions to scan");
}
master.regionManager.addMetaRegionToScan(m);
} else {
// Add it to the online meta regions
if (LOG.isDebugEnabled()) {
LOG.debug("Adding to onlineMetaRegions: " + m.toString());
}
master.regionManager.putMetaRegionOnline(m);
// Interrupting the Meta Scanner sleep so that it can
// process regions right away
master.regionManager.metaScannerThread.interrupt();
}
}
// If updated successfully, remove from pending list.
master.regionManager.removeRegion(regionInfo);
return true;
master.regionManager.addMetaRegionToScan(m);
} else {
// Add it to the online meta regions
if (LOG.isDebugEnabled()) {
LOG.debug("Adding to onlineMetaRegions: " + m.toString());
}
master.regionManager.putMetaRegionOnline(m);
// Interrupting the Meta Scanner sleep so that it can
// process regions right away
master.regionManager.metaScannerThread.interrupt();
}
}.doWithRetries();
return result == null ? true : result;
}
// If updated successfully, remove from pending list.
master.regionManager.removeRegion(regionInfo);
return true;
}
}
@Override
protected int getPriority() {
return 0; // highest priority
}
}

View File

@ -31,7 +31,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation {
protected final HRegionInfo regionInfo;
private volatile MetaRegion metaRegion = null;
protected volatile byte[] metaRegionName = null;
/**
* @param master
* @param regionInfo
@ -47,6 +47,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation {
if (isMetaTable) {
// This operation is for the meta table
if (!rootAvailable()) {
requeue();
// But we can't proceed unless the root region is available
available = false;
}
@ -67,7 +68,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation {
if (isMetaTable) {
this.metaRegionName = HRegionInfo.ROOT_REGIONINFO.getRegionName();
this.metaRegion = new MetaRegion(master.getRootRegionLocation(),
this.metaRegionName, HConstants.EMPTY_START_ROW);
HRegionInfo.ROOT_REGIONINFO);
} else {
this.metaRegion =
master.regionManager.getFirstMetaRegionForRegion(regionInfo);

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion;
@ -44,12 +45,14 @@ import org.apache.hadoop.hbase.io.RowResult;
*/
class ProcessServerShutdown extends RegionServerOperation {
private final String deadServer;
private final boolean rootRegionServer;
private boolean rootRegionReassigned = false;
private boolean isRootServer;
private List<MetaRegion> metaRegions;
private Path oldLogDir;
private boolean logSplit;
private boolean rootRescanned;
private HServerAddress deadServerAddress;
private static class ToDoEntry {
boolean regionOffline;
@ -66,17 +69,33 @@ class ProcessServerShutdown extends RegionServerOperation {
/**
* @param master
* @param serverInfo
* @param rootRegionServer
*/
public ProcessServerShutdown(HMaster master, HServerInfo serverInfo,
boolean rootRegionServer) {
public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) {
super(master);
this.deadServer = HServerInfo.getServerName(serverInfo);
this.rootRegionServer = rootRegionServer;
this.deadServerAddress = serverInfo.getServerAddress();
this.logSplit = false;
this.rootRescanned = false;
this.oldLogDir =
new Path(master.rootdir, HLog.getHLogDirectoryName(serverInfo));
// check to see if I am responsible for either ROOT or any of the META tables.
closeMetaRegions();
}
private void closeMetaRegions() {
isRootServer = master.regionManager.isRootServer(deadServerAddress);
if (isRootServer) {
master.regionManager.unsetRootRegion();
}
List<byte[]> metaStarts = master.regionManager.listMetaRegionsForServer(deadServerAddress);
metaRegions = new ArrayList<MetaRegion>();
for (byte [] region : metaStarts) {
MetaRegion r = master.regionManager.offlineMetaRegion(region);
metaRegions.add(r);
}
}
@Override
@ -254,17 +273,23 @@ class ProcessServerShutdown extends RegionServerOperation {
logSplit = true;
}
if (this.rootRegionServer && !this.rootRegionReassigned) {
// avoid multiple root region reassignment
this.rootRegionReassigned = true;
// The server that died was serving the root region. Now that the log
// has been split, get it reassigned.
LOG.info("Log split complete, meta reassignment and scanning:");
if (this.isRootServer) {
LOG.info("ProcessServerShutdown reassigning ROOT region");
master.regionManager.reassignRootRegion();
// When we call rootAvailable below, it will put us on the delayed
// to do queue to allow some time to pass during which the root
// region will hopefully get reassigned.
isRootServer = false; // prevent double reassignment... heh.
}
for (MetaRegion metaRegion : metaRegions) {
LOG.info("ProcessServerShutdown setting to unassigned: " + metaRegion.toString());
master.regionManager.setUnassigned(metaRegion.getRegionInfo(), true);
}
// one the meta regions are online, "forget" about them. Since there are explicit
// checks below to make sure meta/root are online, this is likely to occur.
metaRegions.clear();
if (!rootAvailable()) {
// Return true so that worker does not put this request back on the
// toDoQueue.
@ -276,8 +301,7 @@ class ProcessServerShutdown extends RegionServerOperation {
// Scan the ROOT region
Boolean result = new ScanRootRegion(
new MetaRegion(master.getRootRegionLocation(),
HRegionInfo.ROOT_REGIONINFO.getRegionName(),
HConstants.EMPTY_START_ROW), this.master).doWithRetries();
HRegionInfo.ROOT_REGIONINFO), this.master).doWithRetries();
if (result == null) {
// Master is closing - give up
return true;
@ -315,4 +339,9 @@ class ProcessServerShutdown extends RegionServerOperation {
}
return true;
}
@Override
protected int getPriority() {
return 2; // high but not highest priority
}
}

View File

@ -96,7 +96,7 @@ class RegionManager implements HConstants {
*
* @see RegionState inner-class below
*/
private final SortedMap<String, RegionState> regionsInTransition =
final SortedMap<String, RegionState> regionsInTransition =
Collections.synchronizedSortedMap(new TreeMap<String, RegionState>());
// How many regions to assign a server at a time.
@ -164,6 +164,7 @@ class RegionManager implements HConstants {
rootRegionLocation.set(null);
regionsInTransition.remove(
HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString());
LOG.info("-ROOT- region unset (but not set to be reassigned)");
}
}
@ -175,6 +176,7 @@ class RegionManager implements HConstants {
s.setUnassigned();
regionsInTransition.put(
HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString(), s);
LOG.info("ROOT inserted into regionsInTransition");
}
}
}
@ -191,9 +193,12 @@ class RegionManager implements HConstants {
void assignRegions(HServerInfo info, HRegionInfo[] mostLoadedRegions,
ArrayList<HMsg> returnMsgs) {
HServerLoad thisServersLoad = info.getLoad();
boolean isSingleServer = master.serverManager.numServers() == 1;
// figure out what regions need to be assigned and aren't currently being
// worked on elsewhere.
Set<RegionState> regionsToAssign = regionsAwaitingAssignment();
Set<RegionState> regionsToAssign = regionsAwaitingAssignment(info.getServerAddress(),
isSingleServer);
if (regionsToAssign.size() == 0) {
// There are no regions waiting to be assigned.
if (!inSafeMode()) {
@ -203,12 +208,12 @@ class RegionManager implements HConstants {
}
} else {
// if there's only one server, just give it all the regions
if (master.serverManager.numServers() == 1) {
if (isSingleServer) {
assignRegionsToOneServer(regionsToAssign, info, returnMsgs);
} else {
// otherwise, give this server a few regions taking into account the
// load of all the other servers.
assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
info, returnMsgs);
}
}
@ -224,11 +229,21 @@ class RegionManager implements HConstants {
private void assignRegionsToMultipleServers(final HServerLoad thisServersLoad,
final Set<RegionState> regionsToAssign, final HServerInfo info,
final ArrayList<HMsg> returnMsgs) {
boolean isMetaAssign = false;
for (RegionState s : regionsToAssign) {
if (s.getRegionInfo().isMetaRegion())
isMetaAssign = true;
}
int nRegionsToAssign = regionsToAssign.size();
int nregions = regionsPerServer(nRegionsToAssign, thisServersLoad);
LOG.debug("multi assing for " + info + ": nregions to assign: "
+ nRegionsToAssign
+" and nregions: " + nregions
+ " metaAssign: " + isMetaAssign);
nRegionsToAssign -= nregions;
if (nRegionsToAssign > 0) {
if (nRegionsToAssign > 0 || isMetaAssign) {
// We still have more regions to assign. See how many we can assign
// before this server becomes more heavily loaded than the next
// most heavily loaded server.
@ -244,6 +259,8 @@ class RegionManager implements HConstants {
// continue;
}
LOG.debug("Doing for " + info + " nregions: " + nregions +
" and nRegionsToAssign: " + nRegionsToAssign);
if (nregions < nRegionsToAssign) {
// There are some more heavily loaded servers
// but we can't assign all the regions to this server.
@ -306,8 +323,32 @@ class RegionManager implements HConstants {
LOG.info("Assigning region " + regionName + " to " + sinfo.getServerName());
rs.setPendingOpen(sinfo.getServerName());
this.regionsInTransition.put(regionName, rs);
this.historian.addRegionAssignment(rs.getRegionInfo(),
sinfo.getServerName());
// Since the meta/root may not be available at this moment, we
try {
// TODO move this into an actual class, and use the RetryableMetaOperation
master.toDoQueue.put(
new RegionServerOperation(master) {
protected boolean process() throws IOException {
if (!rootAvailable() || !metaTableAvailable()) {
return true; // the two above us will put us on the delayed queue
}
// this call can cause problems if meta/root is offline!
historian.addRegionAssignment(rs.getRegionInfo(),
sinfo.getServerName());
return true;
}
public String toString() {
return "RegionAssignmentHistorian from " + sinfo.getServerName();
}
}
);
} catch (InterruptedException e) {
// ignore and don't write the region historian
LOG.info("doRegionAssignment: Couldn't queue the region historian due to exception: " + e);
}
returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, rs.getRegionInfo()));
}
@ -355,18 +396,40 @@ class RegionManager implements HConstants {
* only caller (assignRegions, whose caller is ServerManager.processMsgs) owns
* the monitor for RegionManager
*/
private Set<RegionState> regionsAwaitingAssignment() {
private Set<RegionState> regionsAwaitingAssignment(HServerAddress addr,
boolean isSingleServer) {
// set of regions we want to assign to this server
Set<RegionState> regionsToAssign = new HashSet<RegionState>();
// Look over the set of regions that aren't currently assigned to
boolean isMetaServer = isMetaServer(addr);
// Handle if root is unassigned... only assign root if root is offline.
RegionState rootState = regionsInTransition.get(HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString());
if (rootState != null && rootState.isUnassigned()) {
// make sure root isnt assigned here first.
// if so return 'empty list'
// by definition there is no way this could be a ROOT region (since it's
// unassigned) so just make sure it isn't hosting META regions.
if (!isMetaServer) {
regionsToAssign.add(rootState);
}
return regionsToAssign;
}
// Look over the set of regions that aren't currently assigned to
// determine which we should assign to this server.
boolean reassigningMetas = numberOfMetaRegions.get() != onlineMetaRegions.size();
boolean isMetaOrRoot = isMetaServer || isRootServer(addr);
if (reassigningMetas && isMetaOrRoot && !isSingleServer) {
return regionsToAssign; // dont assign anything to this server.
}
for (RegionState s: regionsInTransition.values()) {
HRegionInfo i = s.getRegionInfo();
if (i == null) {
continue;
}
if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
if (reassigningMetas &&
!i.isMetaRegion()) {
// Can't assign user regions until all meta regions have been assigned
// and are on-line
@ -455,7 +518,7 @@ class RegionManager implements HConstants {
}
LOG.info("Skipped " + skipped + " region(s) that are in transition states");
}
static class TableDirFilter implements PathFilter {
public boolean accept(Path path) {
@ -607,7 +670,7 @@ class RegionManager implements HConstants {
Bytes.toString(HConstants.ROOT_TABLE_NAME));
}
metaRegions.add(new MetaRegion(rootRegionLocation.get(),
HRegionInfo.ROOT_REGIONINFO.getRegionName()));
HRegionInfo.ROOT_REGIONINFO));
} else {
if (!areAllMetaRegionsOnline()) {
throw new NotAllMetaRegionsOnlineException();
@ -685,7 +748,7 @@ class RegionManager implements HConstants {
* @return list of MetaRegion objects
*/
public List<MetaRegion> getListOfOnlineMetaRegions() {
List<MetaRegion> regions = null;
List<MetaRegion> regions;
synchronized(onlineMetaRegions) {
regions = new ArrayList<MetaRegion>(onlineMetaRegions.values());
}
@ -712,11 +775,104 @@ class RegionManager implements HConstants {
/**
* Set an online MetaRegion offline - remove it from the map.
* @param startKey region name
* @return the MetaRegion that was taken offline.
*/
public void offlineMetaRegion(byte [] startKey) {
onlineMetaRegions.remove(startKey);
public MetaRegion offlineMetaRegion(byte [] startKey) {
LOG.info("META region removed from onlineMetaRegions");
return onlineMetaRegions.remove(startKey);
}
public boolean isRootServer(HServerAddress server) {
if (master.getRootRegionLocation() != null
&& server.equals(master.getRootRegionLocation()))
return true;
return false;
}
/**
* Returns the list of byte[] start-keys for any .META. regions hosted
* on the indicated server.
*
* @param server server address
* @return list of meta region start-keys.
*/
public List<byte[]> listMetaRegionsForServer(HServerAddress server) {
List<byte[]> metas = new ArrayList<byte[]>();
for ( MetaRegion region : onlineMetaRegions.values() ) {
if (server.equals(region.getServer())) {
metas.add(region.getStartKey());
}
}
return metas;
}
/**
* Does this server have any META regions open on it, or any meta
* regions being assigned to it?
*
* @param server Server IP:port
* @return true if server has meta region assigned
*/
public boolean isMetaServer(HServerAddress server) {
for ( MetaRegion region : onlineMetaRegions.values() ) {
if (server.equals(region.getServer())) {
return true;
}
}
// This might be expensive, but we need to make sure we dont
// get double assignment to the same regionserver.
for (RegionState s : regionsInTransition.values()) {
if (s.getRegionInfo().isMetaRegion()
&& !s.isUnassigned()
&& s.getServerName() != null
&& s.getServerName().equals(server.toString())) {
// Has an outstanding meta region to be assigned.
return true;
}
}
return false;
}
/**
* Call to take this metaserver offline for immediate reassignment. Used only
* when we know a region has shut down cleanly.
*
* A meta server is a server that hosts either -ROOT- or any .META. regions.
*
* If you are considering a unclean shutdown potentially, use ProcessServerShutdown which
* calls other methods to immediately unassign root/meta but delay the reassign until the
* log has been split.
*
* @param server the server that went down
* @return true if this was in fact a meta server, false if it did not carry meta regions.
*/
public synchronized boolean offlineMetaServer(HServerAddress server) {
boolean hasMeta = false;
// check to see if ROOT and/or .META. are on this server, reassign them.
// use master.getRootRegionLocation.
if (master.getRootRegionLocation() != null &&
server.equals(master.getRootRegionLocation())) {
LOG.info("Offlined ROOT server: " + server);
reassignRootRegion();
hasMeta = true;
}
// AND
for ( MetaRegion region : onlineMetaRegions.values() ) {
if (server.equals(region.getServer())) {
LOG.info("Offlining META region: " + region);
offlineMetaRegion(region.getStartKey());
// Set for reassignment.
setUnassigned(region.getRegionInfo(), true);
hasMeta = true;
}
}
return hasMeta;
}
/**
* Remove a region from the region state map.
*

View File

@ -89,6 +89,14 @@ abstract class RegionServerOperation implements Delayed, HConstants {
}
return available;
}
public int compareTo(RegionServerOperation other) {
return getPriority() - other.getPriority();
}
// the Priority of this operation, 0 is lowest priority
protected int getPriority() {
return Integer.MAX_VALUE;
}
protected abstract boolean process() throws IOException;
}

View File

@ -91,6 +91,7 @@ abstract class RetryableMetaOperation<T> implements Callable<T> {
exceptions.add(e);
}
} catch (Exception e) {
LOG.debug("Exception in RetryableMetaOperation: ", e);
throw new RuntimeException(e);
}
sleeper.sleep();

View File

@ -52,7 +52,7 @@ class RootScanner extends BaseScanner {
synchronized(scannerLock) {
if (master.getRootRegionLocation() != null) {
scanRegion(new MetaRegion(master.getRootRegionLocation(),
HRegionInfo.ROOT_REGIONINFO.getRegionName()));
HRegionInfo.ROOT_REGIONINFO));
}
}
} catch (IOException e) {

View File

@ -184,14 +184,9 @@ class ServerManager implements HConstants {
// The startup message was from a known server with the same name.
// Timeout the old one right away.
HServerAddress root = master.getRootRegionLocation();
boolean rootServer = false;
if (root != null && root.equals(storedInfo.getServerAddress())) {
master.regionManager.unsetRootRegion();
rootServer = true;
}
try {
master.toDoQueue.put(
new ProcessServerShutdown(master, storedInfo, rootServer));
new ProcessServerShutdown(master, storedInfo));
} catch (InterruptedException e) {
LOG.error("Insertion into toDoQueue was interrupted", e);
}
@ -321,7 +316,8 @@ class ServerManager implements HConstants {
private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) {
synchronized (serversToServerInfo) {
try {
// HRegionServer is shutting down.
// This method removes ROOT/META from the list and marks them to be reassigned
// in addition to other housework.
if (removeServerInfo(serverInfo.getServerName(),
serverInfo.getServerAddress())) {
// Only process the exit message if the server still has registered info.
@ -335,13 +331,9 @@ class ServerManager implements HConstants {
LOG.info("Processing " + msgs[i] + " from " +
serverInfo.getServerName());
HRegionInfo info = msgs[i].getRegionInfo();
synchronized (master.regionManager) {
if (info.isRootRegion()) {
master.regionManager.reassignRootRegion();
} else {
if (info.isMetaTable()) {
master.regionManager.offlineMetaRegion(info.getStartKey());
}
// Meta/root region offlining is handed in removeServerInfo above.
if (!info.isMetaRegion()) {
synchronized (master.regionManager) {
if (!master.regionManager.isOfflined(
info.getRegionNameAsString())) {
master.regionManager.setUnassigned(info, true);
@ -467,14 +459,18 @@ class ServerManager implements HConstants {
master.regionManager.setPendingClose(i.getRegionNameAsString());
}
// Figure out what the RegionServer ought to do, and write back.
// Should we tell it close regions because its overloaded? If its
// currently opening regions, leave it alone till all are open.
if (openingCount < this.nobalancingCount) {
LOG.debug("Process all wells: " + serverInfo + " openingCount: " + openingCount +
", nobalancingCount: " + nobalancingCount);
if ((openingCount < this.nobalancingCount)) {
this.master.regionManager.assignRegions(serverInfo, mostLoadedRegions,
returnMsgs);
}
// Send any pending table actions.
this.master.regionManager.applyActions(serverInfo, returnMsgs);
}
@ -644,10 +640,8 @@ class ServerManager implements HConstants {
// This method can be called a couple of times during shutdown.
if (info != null) {
LOG.info("Removing server's info " + serverName);
if (master.getRootRegionLocation() != null &&
info.getServerAddress().equals(master.getRootRegionLocation())) {
master.regionManager.unsetRootRegion();
}
master.regionManager.offlineMetaServer(info.getServerAddress());
infoUpdated = true;
// update load information
@ -785,17 +779,7 @@ class ServerManager implements HConstants {
// Remove the server from the known servers list and update load info
serverAddressToServerInfo.remove(serverAddress);
HServerInfo info = serversToServerInfo.remove(server);
boolean rootServer = false;
if (info != null) {
HServerAddress root = master.getRootRegionLocation();
if (root != null && root.equals(info.getServerAddress())) {
// NOTE: If the server was serving the root region, we cannot
// reassign
// it here because the new server will start serving the root region
// before ProcessServerShutdown has a chance to split the log file.
master.regionManager.unsetRootRegion();
rootServer = true;
}
String serverName = HServerInfo.getServerName(info);
HServerLoad load = serversToLoad.remove(serverName);
if (load != null) {
@ -812,8 +796,7 @@ class ServerManager implements HConstants {
}
deadServers.add(server);
try {
master.toDoQueue.put(new ProcessServerShutdown(master, info,
rootServer));
master.toDoQueue.put(new ProcessServerShutdown(master, info));
} catch (InterruptedException e) {
LOG.error("insert into toDoQueue was interrupted", e);
}