HBASE-1457 Taking down ROOT/META regionserver can result in cluster becoming in-operational
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@780436 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3946cd29c0
commit
e49a037aeb
|
@ -156,6 +156,8 @@ Release 0.20.0 - Unreleased
|
|||
HBASE-1395 InfoServers no longer put up a UI
|
||||
HBASE-1302 When a new master comes up, regionservers should continue with
|
||||
their region assignments from the last master
|
||||
HBASE-1457 Taking down ROOT/META regionserver can result in cluster
|
||||
becoming in-operational (Ryan Rawson via Stack)
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
|
||||
|
|
|
@ -344,7 +344,7 @@ public class HConnectionManager implements HConstants {
|
|||
rowResult.get(COL_REGIONINFO));
|
||||
|
||||
// Only examine the rows where the startKey is zero length
|
||||
if (info.getStartKey().length == 0) {
|
||||
if (info != null && info.getStartKey().length == 0) {
|
||||
uniqueTables.add(info.getTableDesc());
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -65,7 +65,7 @@ import org.apache.hadoop.util.ReflectionUtils;
|
|||
public class HBaseClient {
|
||||
|
||||
public static final Log LOG =
|
||||
LogFactory.getLog("org.apache.hadoop.ipc.HBaseClass");
|
||||
LogFactory.getLog("org.apache.hadoop.ipc.HBaseClient");
|
||||
protected Hashtable<ConnectionId, Connection> connections =
|
||||
new Hashtable<ConnectionId, Connection>();
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ import java.util.concurrent.BlockingQueue;
|
|||
import java.util.concurrent.DelayQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.PriorityBlockingQueue;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -124,7 +125,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
volatile DelayQueue<RegionServerOperation> delayedToDoQueue =
|
||||
new DelayQueue<RegionServerOperation>();
|
||||
volatile BlockingQueue<RegionServerOperation> toDoQueue =
|
||||
new LinkedBlockingQueue<RegionServerOperation>();
|
||||
new PriorityBlockingQueue<RegionServerOperation>();
|
||||
|
||||
private final HBaseServer server;
|
||||
private final HServerAddress address;
|
||||
|
@ -235,6 +236,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
// The rpc-server port can be ephemeral... ensure we have the correct info
|
||||
this.address = new HServerAddress(server.getListenerAddress());
|
||||
|
||||
// dont retry too much
|
||||
conf.setInt("hbase.client.retries.number", 3);
|
||||
|
||||
this.connection = ServerConnectionManager.getConnection(conf);
|
||||
|
||||
this.metaRescanInterval =
|
||||
|
@ -494,15 +498,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
return false;
|
||||
}
|
||||
LOG.warn("Processing pending operations: " + op.toString(), ex);
|
||||
try {
|
||||
// put the operation back on the queue... maybe it'll work next time.
|
||||
toDoQueue.put(op);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(
|
||||
"Putting into toDoQueue was interrupted.", e);
|
||||
} catch (Exception e) {
|
||||
LOG.error("main processing loop: " + op.toString(), e);
|
||||
}
|
||||
delayedToDoQueue.put(op);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -549,7 +545,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
else if(region.isMetaRegion()) {
|
||||
MetaRegion m =
|
||||
new MetaRegion(new HServerAddress(address),
|
||||
region.getRegionName(), region.getStartKey());
|
||||
region);
|
||||
regionManager.addMetaRegionToScan(m);
|
||||
}
|
||||
assignedRegions.put(region.getRegionName(), region);
|
||||
|
|
|
@ -23,42 +23,36 @@ import java.util.Arrays;
|
|||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
|
||||
/** Describes a meta region and its server */
|
||||
public class MetaRegion implements Comparable<MetaRegion> {
|
||||
private final HServerAddress server;
|
||||
private final byte [] regionName;
|
||||
private final byte [] startKey;
|
||||
private HRegionInfo regionInfo;
|
||||
|
||||
MetaRegion(final HServerAddress server, final byte [] regionName) {
|
||||
this (server, regionName, HConstants.EMPTY_START_ROW);
|
||||
}
|
||||
|
||||
MetaRegion(final HServerAddress server, final byte [] regionName,
|
||||
final byte [] startKey) {
|
||||
MetaRegion(final HServerAddress server, HRegionInfo regionInfo) {
|
||||
if (server == null) {
|
||||
throw new IllegalArgumentException("server cannot be null");
|
||||
}
|
||||
this.server = server;
|
||||
if (regionName == null) {
|
||||
throw new IllegalArgumentException("regionName cannot be null");
|
||||
if (regionInfo == null) {
|
||||
throw new IllegalArgumentException("regionInfo cannot be null");
|
||||
}
|
||||
this.regionName = regionName;
|
||||
this.startKey = startKey;
|
||||
this.regionInfo = regionInfo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{regionname: " + Bytes.toString(this.regionName) +
|
||||
", startKey: <" + Bytes.toString(this.startKey) +
|
||||
">, server: " + this.server.toString() + "}";
|
||||
return "{server: " + this.server.toString() + ", regionname: " +
|
||||
regionInfo.getRegionNameAsString() + ", startKey: <" +
|
||||
Bytes.toString(regionInfo.getStartKey()) + ">}";
|
||||
}
|
||||
|
||||
/** @return the regionName */
|
||||
public byte [] getRegionName() {
|
||||
return regionName;
|
||||
return regionInfo.getRegionName();
|
||||
}
|
||||
|
||||
/** @return the server */
|
||||
|
@ -68,7 +62,11 @@ public class MetaRegion implements Comparable<MetaRegion> {
|
|||
|
||||
/** @return the startKey */
|
||||
public byte [] getStartKey() {
|
||||
return startKey;
|
||||
return regionInfo.getStartKey();
|
||||
}
|
||||
|
||||
public HRegionInfo getRegionInfo() {
|
||||
return regionInfo;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -78,22 +76,17 @@ public class MetaRegion implements Comparable<MetaRegion> {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = Arrays.hashCode(this.regionName);
|
||||
result ^= Arrays.hashCode(this.startKey);
|
||||
return result;
|
||||
return regionInfo.hashCode();
|
||||
}
|
||||
|
||||
// Comparable
|
||||
|
||||
public int compareTo(MetaRegion other) {
|
||||
int result = Bytes.compareTo(this.regionName, other.getRegionName());
|
||||
if(result == 0) {
|
||||
result = Bytes.compareTo(this.startKey, other.getStartKey());
|
||||
if (result == 0) {
|
||||
// Might be on different host?
|
||||
result = this.server.compareTo(other.server);
|
||||
}
|
||||
int cmp = regionInfo.compareTo(other.regionInfo);
|
||||
if(cmp == 0) {
|
||||
// Might be on different host?
|
||||
cmp = this.server.compareTo(other.server);
|
||||
}
|
||||
return result;
|
||||
return cmp;
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@ import org.apache.hadoop.hbase.HRegionInfo;
|
|||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.HServerInfo;
|
||||
import org.apache.hadoop.hbase.RegionHistorian;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
|
@ -58,67 +59,68 @@ class ProcessRegionOpen extends ProcessRegionStatusChange {
|
|||
|
||||
@Override
|
||||
protected boolean process() throws IOException {
|
||||
Boolean result =
|
||||
new RetryableMetaOperation<Boolean>(getMetaRegion(), this.master) {
|
||||
private final RegionHistorian historian = RegionHistorian.getInstance();
|
||||
|
||||
public Boolean call() throws IOException {
|
||||
LOG.info(regionInfo.getRegionNameAsString() + " open on " +
|
||||
serverInfo.getServerAddress().toString());
|
||||
if (!metaRegionAvailable()) {
|
||||
// We can't proceed unless the meta region we are going to update
|
||||
// is online. metaRegionAvailable() has put this operation on the
|
||||
// delayedToDoQueue, so return true so the operation is not put
|
||||
// back on the toDoQueue
|
||||
return true;
|
||||
}
|
||||
if (!metaRegionAvailable()) {
|
||||
// We can't proceed unless the meta region we are going to update
|
||||
// is online. metaRegionAvailable() has put this operation on the
|
||||
// delayedToDoQueue, so return true so the operation is not put
|
||||
// back on the toDoQueue
|
||||
return true;
|
||||
}
|
||||
|
||||
// Register the newly-available Region's location.
|
||||
LOG.info("updating row " + regionInfo.getRegionNameAsString() +
|
||||
" in region " + Bytes.toString(metaRegionName) + " with " +
|
||||
" with startcode " + serverInfo.getStartCode() + " and server " +
|
||||
serverInfo.getServerAddress());
|
||||
BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
|
||||
b.put(COL_SERVER,
|
||||
Bytes.toBytes(serverInfo.getServerAddress().toString()));
|
||||
b.put(COL_STARTCODE, Bytes.toBytes(serverInfo.getStartCode()));
|
||||
server.batchUpdate(metaRegionName, b, -1L);
|
||||
if (!this.historian.isOnline()) {
|
||||
// This is safest place to do the onlining of the historian in
|
||||
// the master. When we get to here, we know there is a .META.
|
||||
// for the historian to go against.
|
||||
this.historian.online(this.master.getConfiguration());
|
||||
final RegionHistorian historian = RegionHistorian.getInstance();
|
||||
HRegionInterface server =
|
||||
master.connection.getHRegionConnection(getMetaRegion().getServer());
|
||||
LOG.info(regionInfo.getRegionNameAsString() + " open on " +
|
||||
serverInfo.getServerAddress().toString());
|
||||
|
||||
// Register the newly-available Region's location.
|
||||
LOG.info("updating row " + regionInfo.getRegionNameAsString() +
|
||||
" in region " + Bytes.toString(metaRegionName) + " with " +
|
||||
" with startcode " + serverInfo.getStartCode() + " and server " +
|
||||
serverInfo.getServerAddress());
|
||||
BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
|
||||
b.put(COL_SERVER,
|
||||
Bytes.toBytes(serverInfo.getServerAddress().toString()));
|
||||
b.put(COL_STARTCODE, Bytes.toBytes(serverInfo.getStartCode()));
|
||||
server.batchUpdate(metaRegionName, b, -1L);
|
||||
if (!historian.isOnline()) {
|
||||
// This is safest place to do the onlining of the historian in
|
||||
// the master. When we get to here, we know there is a .META.
|
||||
// for the historian to go against.
|
||||
historian.online(this.master.getConfiguration());
|
||||
}
|
||||
historian.addRegionOpen(regionInfo, serverInfo.getServerAddress());
|
||||
synchronized (master.regionManager) {
|
||||
if (isMetaTable) {
|
||||
// It's a meta region.
|
||||
MetaRegion m =
|
||||
new MetaRegion(new HServerAddress(serverInfo.getServerAddress()),
|
||||
regionInfo);
|
||||
if (!master.regionManager.isInitialMetaScanComplete()) {
|
||||
// Put it on the queue to be scanned for the first time.
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Adding " + m.toString() + " to regions to scan");
|
||||
}
|
||||
this.historian.addRegionOpen(regionInfo, serverInfo.getServerAddress());
|
||||
synchronized (master.regionManager) {
|
||||
if (isMetaTable) {
|
||||
// It's a meta region.
|
||||
MetaRegion m =
|
||||
new MetaRegion(new HServerAddress(serverInfo.getServerAddress()),
|
||||
regionInfo.getRegionName(), regionInfo.getStartKey());
|
||||
if (!master.regionManager.isInitialMetaScanComplete()) {
|
||||
// Put it on the queue to be scanned for the first time.
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Adding " + m.toString() + " to regions to scan");
|
||||
}
|
||||
master.regionManager.addMetaRegionToScan(m);
|
||||
} else {
|
||||
// Add it to the online meta regions
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Adding to onlineMetaRegions: " + m.toString());
|
||||
}
|
||||
master.regionManager.putMetaRegionOnline(m);
|
||||
// Interrupting the Meta Scanner sleep so that it can
|
||||
// process regions right away
|
||||
master.regionManager.metaScannerThread.interrupt();
|
||||
}
|
||||
}
|
||||
// If updated successfully, remove from pending list.
|
||||
master.regionManager.removeRegion(regionInfo);
|
||||
return true;
|
||||
master.regionManager.addMetaRegionToScan(m);
|
||||
} else {
|
||||
// Add it to the online meta regions
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Adding to onlineMetaRegions: " + m.toString());
|
||||
}
|
||||
master.regionManager.putMetaRegionOnline(m);
|
||||
// Interrupting the Meta Scanner sleep so that it can
|
||||
// process regions right away
|
||||
master.regionManager.metaScannerThread.interrupt();
|
||||
}
|
||||
}.doWithRetries();
|
||||
return result == null ? true : result;
|
||||
}
|
||||
// If updated successfully, remove from pending list.
|
||||
master.regionManager.removeRegion(regionInfo);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getPriority() {
|
||||
return 0; // highest priority
|
||||
}
|
||||
}
|
|
@ -31,7 +31,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation {
|
|||
protected final HRegionInfo regionInfo;
|
||||
private volatile MetaRegion metaRegion = null;
|
||||
protected volatile byte[] metaRegionName = null;
|
||||
|
||||
|
||||
/**
|
||||
* @param master
|
||||
* @param regionInfo
|
||||
|
@ -47,6 +47,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation {
|
|||
if (isMetaTable) {
|
||||
// This operation is for the meta table
|
||||
if (!rootAvailable()) {
|
||||
requeue();
|
||||
// But we can't proceed unless the root region is available
|
||||
available = false;
|
||||
}
|
||||
|
@ -67,7 +68,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation {
|
|||
if (isMetaTable) {
|
||||
this.metaRegionName = HRegionInfo.ROOT_REGIONINFO.getRegionName();
|
||||
this.metaRegion = new MetaRegion(master.getRootRegionLocation(),
|
||||
this.metaRegionName, HConstants.EMPTY_START_ROW);
|
||||
HRegionInfo.ROOT_REGIONINFO);
|
||||
} else {
|
||||
this.metaRegion =
|
||||
master.regionManager.getFirstMetaRegionForRegion(regionInfo);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.HConstants;
|
|||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HServerInfo;
|
||||
import org.apache.hadoop.hbase.RemoteExceptionHandler;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.regionserver.HLog;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
|
@ -44,12 +45,14 @@ import org.apache.hadoop.hbase.io.RowResult;
|
|||
*/
|
||||
class ProcessServerShutdown extends RegionServerOperation {
|
||||
private final String deadServer;
|
||||
private final boolean rootRegionServer;
|
||||
private boolean rootRegionReassigned = false;
|
||||
private boolean isRootServer;
|
||||
private List<MetaRegion> metaRegions;
|
||||
|
||||
private Path oldLogDir;
|
||||
private boolean logSplit;
|
||||
private boolean rootRescanned;
|
||||
|
||||
private HServerAddress deadServerAddress;
|
||||
|
||||
|
||||
private static class ToDoEntry {
|
||||
boolean regionOffline;
|
||||
|
@ -66,17 +69,33 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
/**
|
||||
* @param master
|
||||
* @param serverInfo
|
||||
* @param rootRegionServer
|
||||
*/
|
||||
public ProcessServerShutdown(HMaster master, HServerInfo serverInfo,
|
||||
boolean rootRegionServer) {
|
||||
public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) {
|
||||
super(master);
|
||||
this.deadServer = HServerInfo.getServerName(serverInfo);
|
||||
this.rootRegionServer = rootRegionServer;
|
||||
this.deadServerAddress = serverInfo.getServerAddress();
|
||||
this.logSplit = false;
|
||||
this.rootRescanned = false;
|
||||
this.oldLogDir =
|
||||
new Path(master.rootdir, HLog.getHLogDirectoryName(serverInfo));
|
||||
|
||||
// check to see if I am responsible for either ROOT or any of the META tables.
|
||||
|
||||
closeMetaRegions();
|
||||
}
|
||||
|
||||
private void closeMetaRegions() {
|
||||
isRootServer = master.regionManager.isRootServer(deadServerAddress);
|
||||
if (isRootServer) {
|
||||
master.regionManager.unsetRootRegion();
|
||||
}
|
||||
List<byte[]> metaStarts = master.regionManager.listMetaRegionsForServer(deadServerAddress);
|
||||
|
||||
metaRegions = new ArrayList<MetaRegion>();
|
||||
for (byte [] region : metaStarts) {
|
||||
MetaRegion r = master.regionManager.offlineMetaRegion(region);
|
||||
metaRegions.add(r);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -254,17 +273,23 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
logSplit = true;
|
||||
}
|
||||
|
||||
if (this.rootRegionServer && !this.rootRegionReassigned) {
|
||||
// avoid multiple root region reassignment
|
||||
this.rootRegionReassigned = true;
|
||||
// The server that died was serving the root region. Now that the log
|
||||
// has been split, get it reassigned.
|
||||
LOG.info("Log split complete, meta reassignment and scanning:");
|
||||
|
||||
if (this.isRootServer) {
|
||||
LOG.info("ProcessServerShutdown reassigning ROOT region");
|
||||
master.regionManager.reassignRootRegion();
|
||||
// When we call rootAvailable below, it will put us on the delayed
|
||||
// to do queue to allow some time to pass during which the root
|
||||
// region will hopefully get reassigned.
|
||||
|
||||
isRootServer = false; // prevent double reassignment... heh.
|
||||
}
|
||||
|
||||
for (MetaRegion metaRegion : metaRegions) {
|
||||
LOG.info("ProcessServerShutdown setting to unassigned: " + metaRegion.toString());
|
||||
master.regionManager.setUnassigned(metaRegion.getRegionInfo(), true);
|
||||
}
|
||||
// one the meta regions are online, "forget" about them. Since there are explicit
|
||||
// checks below to make sure meta/root are online, this is likely to occur.
|
||||
metaRegions.clear();
|
||||
|
||||
if (!rootAvailable()) {
|
||||
// Return true so that worker does not put this request back on the
|
||||
// toDoQueue.
|
||||
|
@ -276,8 +301,7 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
// Scan the ROOT region
|
||||
Boolean result = new ScanRootRegion(
|
||||
new MetaRegion(master.getRootRegionLocation(),
|
||||
HRegionInfo.ROOT_REGIONINFO.getRegionName(),
|
||||
HConstants.EMPTY_START_ROW), this.master).doWithRetries();
|
||||
HRegionInfo.ROOT_REGIONINFO), this.master).doWithRetries();
|
||||
if (result == null) {
|
||||
// Master is closing - give up
|
||||
return true;
|
||||
|
@ -315,4 +339,9 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getPriority() {
|
||||
return 2; // high but not highest priority
|
||||
}
|
||||
}
|
|
@ -96,7 +96,7 @@ class RegionManager implements HConstants {
|
|||
*
|
||||
* @see RegionState inner-class below
|
||||
*/
|
||||
private final SortedMap<String, RegionState> regionsInTransition =
|
||||
final SortedMap<String, RegionState> regionsInTransition =
|
||||
Collections.synchronizedSortedMap(new TreeMap<String, RegionState>());
|
||||
|
||||
// How many regions to assign a server at a time.
|
||||
|
@ -164,6 +164,7 @@ class RegionManager implements HConstants {
|
|||
rootRegionLocation.set(null);
|
||||
regionsInTransition.remove(
|
||||
HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString());
|
||||
LOG.info("-ROOT- region unset (but not set to be reassigned)");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,6 +176,7 @@ class RegionManager implements HConstants {
|
|||
s.setUnassigned();
|
||||
regionsInTransition.put(
|
||||
HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString(), s);
|
||||
LOG.info("ROOT inserted into regionsInTransition");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -191,9 +193,12 @@ class RegionManager implements HConstants {
|
|||
void assignRegions(HServerInfo info, HRegionInfo[] mostLoadedRegions,
|
||||
ArrayList<HMsg> returnMsgs) {
|
||||
HServerLoad thisServersLoad = info.getLoad();
|
||||
boolean isSingleServer = master.serverManager.numServers() == 1;
|
||||
|
||||
// figure out what regions need to be assigned and aren't currently being
|
||||
// worked on elsewhere.
|
||||
Set<RegionState> regionsToAssign = regionsAwaitingAssignment();
|
||||
Set<RegionState> regionsToAssign = regionsAwaitingAssignment(info.getServerAddress(),
|
||||
isSingleServer);
|
||||
if (regionsToAssign.size() == 0) {
|
||||
// There are no regions waiting to be assigned.
|
||||
if (!inSafeMode()) {
|
||||
|
@ -203,12 +208,12 @@ class RegionManager implements HConstants {
|
|||
}
|
||||
} else {
|
||||
// if there's only one server, just give it all the regions
|
||||
if (master.serverManager.numServers() == 1) {
|
||||
if (isSingleServer) {
|
||||
assignRegionsToOneServer(regionsToAssign, info, returnMsgs);
|
||||
} else {
|
||||
// otherwise, give this server a few regions taking into account the
|
||||
// load of all the other servers.
|
||||
assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
|
||||
assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
|
||||
info, returnMsgs);
|
||||
}
|
||||
}
|
||||
|
@ -224,11 +229,21 @@ class RegionManager implements HConstants {
|
|||
private void assignRegionsToMultipleServers(final HServerLoad thisServersLoad,
|
||||
final Set<RegionState> regionsToAssign, final HServerInfo info,
|
||||
final ArrayList<HMsg> returnMsgs) {
|
||||
|
||||
|
||||
boolean isMetaAssign = false;
|
||||
for (RegionState s : regionsToAssign) {
|
||||
if (s.getRegionInfo().isMetaRegion())
|
||||
isMetaAssign = true;
|
||||
}
|
||||
|
||||
int nRegionsToAssign = regionsToAssign.size();
|
||||
int nregions = regionsPerServer(nRegionsToAssign, thisServersLoad);
|
||||
LOG.debug("multi assing for " + info + ": nregions to assign: "
|
||||
+ nRegionsToAssign
|
||||
+" and nregions: " + nregions
|
||||
+ " metaAssign: " + isMetaAssign);
|
||||
nRegionsToAssign -= nregions;
|
||||
if (nRegionsToAssign > 0) {
|
||||
if (nRegionsToAssign > 0 || isMetaAssign) {
|
||||
// We still have more regions to assign. See how many we can assign
|
||||
// before this server becomes more heavily loaded than the next
|
||||
// most heavily loaded server.
|
||||
|
@ -244,6 +259,8 @@ class RegionManager implements HConstants {
|
|||
// continue;
|
||||
}
|
||||
|
||||
LOG.debug("Doing for " + info + " nregions: " + nregions +
|
||||
" and nRegionsToAssign: " + nRegionsToAssign);
|
||||
if (nregions < nRegionsToAssign) {
|
||||
// There are some more heavily loaded servers
|
||||
// but we can't assign all the regions to this server.
|
||||
|
@ -306,8 +323,32 @@ class RegionManager implements HConstants {
|
|||
LOG.info("Assigning region " + regionName + " to " + sinfo.getServerName());
|
||||
rs.setPendingOpen(sinfo.getServerName());
|
||||
this.regionsInTransition.put(regionName, rs);
|
||||
this.historian.addRegionAssignment(rs.getRegionInfo(),
|
||||
sinfo.getServerName());
|
||||
|
||||
// Since the meta/root may not be available at this moment, we
|
||||
try {
|
||||
// TODO move this into an actual class, and use the RetryableMetaOperation
|
||||
master.toDoQueue.put(
|
||||
new RegionServerOperation(master) {
|
||||
protected boolean process() throws IOException {
|
||||
if (!rootAvailable() || !metaTableAvailable()) {
|
||||
return true; // the two above us will put us on the delayed queue
|
||||
}
|
||||
|
||||
// this call can cause problems if meta/root is offline!
|
||||
historian.addRegionAssignment(rs.getRegionInfo(),
|
||||
sinfo.getServerName());
|
||||
return true;
|
||||
}
|
||||
public String toString() {
|
||||
return "RegionAssignmentHistorian from " + sinfo.getServerName();
|
||||
}
|
||||
}
|
||||
);
|
||||
} catch (InterruptedException e) {
|
||||
// ignore and don't write the region historian
|
||||
LOG.info("doRegionAssignment: Couldn't queue the region historian due to exception: " + e);
|
||||
}
|
||||
|
||||
returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, rs.getRegionInfo()));
|
||||
}
|
||||
|
||||
|
@ -355,18 +396,40 @@ class RegionManager implements HConstants {
|
|||
* only caller (assignRegions, whose caller is ServerManager.processMsgs) owns
|
||||
* the monitor for RegionManager
|
||||
*/
|
||||
private Set<RegionState> regionsAwaitingAssignment() {
|
||||
private Set<RegionState> regionsAwaitingAssignment(HServerAddress addr,
|
||||
boolean isSingleServer) {
|
||||
// set of regions we want to assign to this server
|
||||
Set<RegionState> regionsToAssign = new HashSet<RegionState>();
|
||||
|
||||
// Look over the set of regions that aren't currently assigned to
|
||||
|
||||
boolean isMetaServer = isMetaServer(addr);
|
||||
|
||||
// Handle if root is unassigned... only assign root if root is offline.
|
||||
RegionState rootState = regionsInTransition.get(HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString());
|
||||
if (rootState != null && rootState.isUnassigned()) {
|
||||
// make sure root isnt assigned here first.
|
||||
// if so return 'empty list'
|
||||
// by definition there is no way this could be a ROOT region (since it's
|
||||
// unassigned) so just make sure it isn't hosting META regions.
|
||||
if (!isMetaServer) {
|
||||
regionsToAssign.add(rootState);
|
||||
}
|
||||
return regionsToAssign;
|
||||
}
|
||||
|
||||
// Look over the set of regions that aren't currently assigned to
|
||||
// determine which we should assign to this server.
|
||||
boolean reassigningMetas = numberOfMetaRegions.get() != onlineMetaRegions.size();
|
||||
boolean isMetaOrRoot = isMetaServer || isRootServer(addr);
|
||||
if (reassigningMetas && isMetaOrRoot && !isSingleServer) {
|
||||
return regionsToAssign; // dont assign anything to this server.
|
||||
}
|
||||
|
||||
for (RegionState s: regionsInTransition.values()) {
|
||||
HRegionInfo i = s.getRegionInfo();
|
||||
if (i == null) {
|
||||
continue;
|
||||
}
|
||||
if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
|
||||
if (reassigningMetas &&
|
||||
!i.isMetaRegion()) {
|
||||
// Can't assign user regions until all meta regions have been assigned
|
||||
// and are on-line
|
||||
|
@ -455,7 +518,7 @@ class RegionManager implements HConstants {
|
|||
}
|
||||
LOG.info("Skipped " + skipped + " region(s) that are in transition states");
|
||||
}
|
||||
|
||||
|
||||
static class TableDirFilter implements PathFilter {
|
||||
|
||||
public boolean accept(Path path) {
|
||||
|
@ -607,7 +670,7 @@ class RegionManager implements HConstants {
|
|||
Bytes.toString(HConstants.ROOT_TABLE_NAME));
|
||||
}
|
||||
metaRegions.add(new MetaRegion(rootRegionLocation.get(),
|
||||
HRegionInfo.ROOT_REGIONINFO.getRegionName()));
|
||||
HRegionInfo.ROOT_REGIONINFO));
|
||||
} else {
|
||||
if (!areAllMetaRegionsOnline()) {
|
||||
throw new NotAllMetaRegionsOnlineException();
|
||||
|
@ -685,7 +748,7 @@ class RegionManager implements HConstants {
|
|||
* @return list of MetaRegion objects
|
||||
*/
|
||||
public List<MetaRegion> getListOfOnlineMetaRegions() {
|
||||
List<MetaRegion> regions = null;
|
||||
List<MetaRegion> regions;
|
||||
synchronized(onlineMetaRegions) {
|
||||
regions = new ArrayList<MetaRegion>(onlineMetaRegions.values());
|
||||
}
|
||||
|
@ -712,11 +775,104 @@ class RegionManager implements HConstants {
|
|||
/**
|
||||
* Set an online MetaRegion offline - remove it from the map.
|
||||
* @param startKey region name
|
||||
* @return the MetaRegion that was taken offline.
|
||||
*/
|
||||
public void offlineMetaRegion(byte [] startKey) {
|
||||
onlineMetaRegions.remove(startKey);
|
||||
public MetaRegion offlineMetaRegion(byte [] startKey) {
|
||||
LOG.info("META region removed from onlineMetaRegions");
|
||||
return onlineMetaRegions.remove(startKey);
|
||||
}
|
||||
|
||||
|
||||
public boolean isRootServer(HServerAddress server) {
|
||||
if (master.getRootRegionLocation() != null
|
||||
&& server.equals(master.getRootRegionLocation()))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of byte[] start-keys for any .META. regions hosted
|
||||
* on the indicated server.
|
||||
*
|
||||
* @param server server address
|
||||
* @return list of meta region start-keys.
|
||||
*/
|
||||
public List<byte[]> listMetaRegionsForServer(HServerAddress server) {
|
||||
List<byte[]> metas = new ArrayList<byte[]>();
|
||||
|
||||
for ( MetaRegion region : onlineMetaRegions.values() ) {
|
||||
if (server.equals(region.getServer())) {
|
||||
metas.add(region.getStartKey());
|
||||
}
|
||||
}
|
||||
|
||||
return metas;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this server have any META regions open on it, or any meta
|
||||
* regions being assigned to it?
|
||||
*
|
||||
* @param server Server IP:port
|
||||
* @return true if server has meta region assigned
|
||||
*/
|
||||
public boolean isMetaServer(HServerAddress server) {
|
||||
for ( MetaRegion region : onlineMetaRegions.values() ) {
|
||||
if (server.equals(region.getServer())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// This might be expensive, but we need to make sure we dont
|
||||
// get double assignment to the same regionserver.
|
||||
for (RegionState s : regionsInTransition.values()) {
|
||||
if (s.getRegionInfo().isMetaRegion()
|
||||
&& !s.isUnassigned()
|
||||
&& s.getServerName() != null
|
||||
&& s.getServerName().equals(server.toString())) {
|
||||
// Has an outstanding meta region to be assigned.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call to take this metaserver offline for immediate reassignment. Used only
|
||||
* when we know a region has shut down cleanly.
|
||||
*
|
||||
* A meta server is a server that hosts either -ROOT- or any .META. regions.
|
||||
*
|
||||
* If you are considering a unclean shutdown potentially, use ProcessServerShutdown which
|
||||
* calls other methods to immediately unassign root/meta but delay the reassign until the
|
||||
* log has been split.
|
||||
*
|
||||
* @param server the server that went down
|
||||
* @return true if this was in fact a meta server, false if it did not carry meta regions.
|
||||
*/
|
||||
public synchronized boolean offlineMetaServer(HServerAddress server) {
|
||||
boolean hasMeta = false;
|
||||
|
||||
// check to see if ROOT and/or .META. are on this server, reassign them.
|
||||
// use master.getRootRegionLocation.
|
||||
if (master.getRootRegionLocation() != null &&
|
||||
server.equals(master.getRootRegionLocation())) {
|
||||
LOG.info("Offlined ROOT server: " + server);
|
||||
reassignRootRegion();
|
||||
hasMeta = true;
|
||||
}
|
||||
// AND
|
||||
for ( MetaRegion region : onlineMetaRegions.values() ) {
|
||||
if (server.equals(region.getServer())) {
|
||||
LOG.info("Offlining META region: " + region);
|
||||
offlineMetaRegion(region.getStartKey());
|
||||
// Set for reassignment.
|
||||
setUnassigned(region.getRegionInfo(), true);
|
||||
hasMeta = true;
|
||||
}
|
||||
}
|
||||
return hasMeta;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a region from the region state map.
|
||||
*
|
||||
|
|
|
@ -89,6 +89,14 @@ abstract class RegionServerOperation implements Delayed, HConstants {
|
|||
}
|
||||
return available;
|
||||
}
|
||||
|
||||
|
||||
public int compareTo(RegionServerOperation other) {
|
||||
return getPriority() - other.getPriority();
|
||||
}
|
||||
|
||||
// the Priority of this operation, 0 is lowest priority
|
||||
protected int getPriority() {
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
protected abstract boolean process() throws IOException;
|
||||
}
|
|
@ -91,6 +91,7 @@ abstract class RetryableMetaOperation<T> implements Callable<T> {
|
|||
exceptions.add(e);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.debug("Exception in RetryableMetaOperation: ", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
sleeper.sleep();
|
||||
|
|
|
@ -52,7 +52,7 @@ class RootScanner extends BaseScanner {
|
|||
synchronized(scannerLock) {
|
||||
if (master.getRootRegionLocation() != null) {
|
||||
scanRegion(new MetaRegion(master.getRootRegionLocation(),
|
||||
HRegionInfo.ROOT_REGIONINFO.getRegionName()));
|
||||
HRegionInfo.ROOT_REGIONINFO));
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -184,14 +184,9 @@ class ServerManager implements HConstants {
|
|||
// The startup message was from a known server with the same name.
|
||||
// Timeout the old one right away.
|
||||
HServerAddress root = master.getRootRegionLocation();
|
||||
boolean rootServer = false;
|
||||
if (root != null && root.equals(storedInfo.getServerAddress())) {
|
||||
master.regionManager.unsetRootRegion();
|
||||
rootServer = true;
|
||||
}
|
||||
try {
|
||||
master.toDoQueue.put(
|
||||
new ProcessServerShutdown(master, storedInfo, rootServer));
|
||||
new ProcessServerShutdown(master, storedInfo));
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Insertion into toDoQueue was interrupted", e);
|
||||
}
|
||||
|
@ -321,7 +316,8 @@ class ServerManager implements HConstants {
|
|||
private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) {
|
||||
synchronized (serversToServerInfo) {
|
||||
try {
|
||||
// HRegionServer is shutting down.
|
||||
// This method removes ROOT/META from the list and marks them to be reassigned
|
||||
// in addition to other housework.
|
||||
if (removeServerInfo(serverInfo.getServerName(),
|
||||
serverInfo.getServerAddress())) {
|
||||
// Only process the exit message if the server still has registered info.
|
||||
|
@ -335,13 +331,9 @@ class ServerManager implements HConstants {
|
|||
LOG.info("Processing " + msgs[i] + " from " +
|
||||
serverInfo.getServerName());
|
||||
HRegionInfo info = msgs[i].getRegionInfo();
|
||||
synchronized (master.regionManager) {
|
||||
if (info.isRootRegion()) {
|
||||
master.regionManager.reassignRootRegion();
|
||||
} else {
|
||||
if (info.isMetaTable()) {
|
||||
master.regionManager.offlineMetaRegion(info.getStartKey());
|
||||
}
|
||||
// Meta/root region offlining is handed in removeServerInfo above.
|
||||
if (!info.isMetaRegion()) {
|
||||
synchronized (master.regionManager) {
|
||||
if (!master.regionManager.isOfflined(
|
||||
info.getRegionNameAsString())) {
|
||||
master.regionManager.setUnassigned(info, true);
|
||||
|
@ -467,14 +459,18 @@ class ServerManager implements HConstants {
|
|||
master.regionManager.setPendingClose(i.getRegionNameAsString());
|
||||
}
|
||||
|
||||
|
||||
// Figure out what the RegionServer ought to do, and write back.
|
||||
|
||||
// Should we tell it close regions because its overloaded? If its
|
||||
// currently opening regions, leave it alone till all are open.
|
||||
if (openingCount < this.nobalancingCount) {
|
||||
LOG.debug("Process all wells: " + serverInfo + " openingCount: " + openingCount +
|
||||
", nobalancingCount: " + nobalancingCount);
|
||||
if ((openingCount < this.nobalancingCount)) {
|
||||
this.master.regionManager.assignRegions(serverInfo, mostLoadedRegions,
|
||||
returnMsgs);
|
||||
}
|
||||
|
||||
// Send any pending table actions.
|
||||
this.master.regionManager.applyActions(serverInfo, returnMsgs);
|
||||
}
|
||||
|
@ -644,10 +640,8 @@ class ServerManager implements HConstants {
|
|||
// This method can be called a couple of times during shutdown.
|
||||
if (info != null) {
|
||||
LOG.info("Removing server's info " + serverName);
|
||||
if (master.getRootRegionLocation() != null &&
|
||||
info.getServerAddress().equals(master.getRootRegionLocation())) {
|
||||
master.regionManager.unsetRootRegion();
|
||||
}
|
||||
master.regionManager.offlineMetaServer(info.getServerAddress());
|
||||
|
||||
infoUpdated = true;
|
||||
|
||||
// update load information
|
||||
|
@ -785,17 +779,7 @@ class ServerManager implements HConstants {
|
|||
// Remove the server from the known servers list and update load info
|
||||
serverAddressToServerInfo.remove(serverAddress);
|
||||
HServerInfo info = serversToServerInfo.remove(server);
|
||||
boolean rootServer = false;
|
||||
if (info != null) {
|
||||
HServerAddress root = master.getRootRegionLocation();
|
||||
if (root != null && root.equals(info.getServerAddress())) {
|
||||
// NOTE: If the server was serving the root region, we cannot
|
||||
// reassign
|
||||
// it here because the new server will start serving the root region
|
||||
// before ProcessServerShutdown has a chance to split the log file.
|
||||
master.regionManager.unsetRootRegion();
|
||||
rootServer = true;
|
||||
}
|
||||
String serverName = HServerInfo.getServerName(info);
|
||||
HServerLoad load = serversToLoad.remove(serverName);
|
||||
if (load != null) {
|
||||
|
@ -812,8 +796,7 @@ class ServerManager implements HConstants {
|
|||
}
|
||||
deadServers.add(server);
|
||||
try {
|
||||
master.toDoQueue.put(new ProcessServerShutdown(master, info,
|
||||
rootServer));
|
||||
master.toDoQueue.put(new ProcessServerShutdown(master, info));
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("insert into toDoQueue was interrupted", e);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue