HBASE-14536 Balancer & SSH interfering with each other leading to unavailability
This commit is contained in:
parent
e8c69a5921
commit
9bdb88a572
@ -259,6 +259,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
|
||||
private RegionStateListener regionStateListener;
|
||||
|
||||
public enum ServerHostRegion {
|
||||
NOT_HOSTING_REGION, HOSTING_REGION, UNKNOWN,
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new assignment manager.
|
||||
*
|
||||
@ -3371,16 +3375,16 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
|
||||
}
|
||||
|
||||
public boolean isCarryingMeta(ServerName serverName) {
|
||||
public ServerHostRegion isCarryingMeta(ServerName serverName) {
|
||||
return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
|
||||
}
|
||||
|
||||
public boolean isCarryingMetaReplica(ServerName serverName, int replicaId) {
|
||||
public ServerHostRegion isCarryingMetaReplica(ServerName serverName, int replicaId) {
|
||||
return isCarryingRegion(serverName,
|
||||
RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId));
|
||||
}
|
||||
|
||||
public boolean isCarryingMetaReplica(ServerName serverName, HRegionInfo metaHri) {
|
||||
public ServerHostRegion isCarryingMetaReplica(ServerName serverName, HRegionInfo metaHri) {
|
||||
return isCarryingRegion(serverName, metaHri);
|
||||
}
|
||||
|
||||
@ -3394,7 +3398,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
* processing hasn't finished yet when server shutdown occurs.
|
||||
* @return whether the serverName currently hosts the region
|
||||
*/
|
||||
private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
|
||||
private ServerHostRegion isCarryingRegion(ServerName serverName, HRegionInfo hri) {
|
||||
RegionTransition rt = null;
|
||||
try {
|
||||
byte [] data = ZKAssign.getData(watcher, hri.getEncodedName());
|
||||
@ -3412,17 +3416,37 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
boolean matchZK = addressFromZK.equals(serverName);
|
||||
LOG.debug("Checking region=" + hri.getRegionNameAsString() + ", zk server=" + addressFromZK +
|
||||
" current=" + serverName + ", matches=" + matchZK);
|
||||
return matchZK;
|
||||
return matchZK ? ServerHostRegion.HOSTING_REGION : ServerHostRegion.NOT_HOSTING_REGION;
|
||||
}
|
||||
|
||||
ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
|
||||
boolean matchAM = (addressFromAM != null &&
|
||||
addressFromAM.equals(serverName));
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
|
||||
" is on server=" + (addressFromAM != null ? addressFromAM : "null") +
|
||||
" server being checked: " + serverName);
|
||||
}
|
||||
if (addressFromAM != null) {
|
||||
return addressFromAM.equals(serverName) ?
|
||||
ServerHostRegion.HOSTING_REGION : ServerHostRegion.NOT_HOSTING_REGION;
|
||||
}
|
||||
|
||||
return matchAM;
|
||||
if (hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri)) {
|
||||
// For the Meta region (default replica), we can do one more check on MetaTableLocator
|
||||
final ServerName serverNameInZK =
|
||||
server.getMetaTableLocator().getMetaRegionLocation(this.server.getZooKeeper());
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Based on MetaTableLocator, the META region is on server=" +
|
||||
(serverNameInZK == null ? "null" : serverNameInZK) +
|
||||
" server being checked: " + serverName);
|
||||
}
|
||||
if (serverNameInZK != null) {
|
||||
return serverNameInZK.equals(serverName) ?
|
||||
ServerHostRegion.HOSTING_REGION : ServerHostRegion.NOT_HOSTING_REGION;
|
||||
}
|
||||
}
|
||||
|
||||
// Checked everywhere, if reaching here, we are unsure whether the server is carrying region.
|
||||
return ServerHostRegion.UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -617,7 +617,8 @@ public class ServerManager {
|
||||
return;
|
||||
}
|
||||
|
||||
boolean carryingMeta = services.getAssignmentManager().isCarryingMeta(serverName);
|
||||
boolean carryingMeta = services.getAssignmentManager().isCarryingMeta(serverName) ==
|
||||
AssignmentManager.ServerHostRegion.HOSTING_REGION;
|
||||
this.services.getMasterProcedureExecutor().
|
||||
submitProcedure(new ServerCrashProcedure(serverName, true, carryingMeta));
|
||||
LOG.debug("Added=" + serverName +
|
||||
|
@ -313,8 +313,9 @@ implements ServerProcedureInterface {
|
||||
private boolean processMeta(final MasterProcedureEnv env)
|
||||
throws IOException {
|
||||
if (LOG.isDebugEnabled()) LOG.debug("Processing hbase:meta that was on " + this.serverName);
|
||||
MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
|
||||
AssignmentManager am = env.getMasterServices().getAssignmentManager();
|
||||
MasterServices services = env.getMasterServices();
|
||||
MasterFileSystem mfs = services.getMasterFileSystem();
|
||||
AssignmentManager am = services.getAssignmentManager();
|
||||
HRegionInfo metaHRI = HRegionInfo.FIRST_META_REGIONINFO;
|
||||
if (this.shouldSplitWal) {
|
||||
if (this.distributedLogReplay) {
|
||||
@ -328,9 +329,31 @@ implements ServerProcedureInterface {
|
||||
|
||||
// Assign meta if still carrying it. Check again: region may be assigned because of RIT timeout
|
||||
boolean processed = true;
|
||||
if (am.isCarryingMeta(serverName)) {
|
||||
// TODO: May block here if hard time figuring state of meta.
|
||||
boolean shouldAssignMeta = false;
|
||||
AssignmentManager.ServerHostRegion rsCarryingMetaRegion = am.isCarryingMeta(serverName);
|
||||
switch (rsCarryingMetaRegion) {
|
||||
case HOSTING_REGION:
|
||||
LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
|
||||
am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
|
||||
shouldAssignMeta = true;
|
||||
break;
|
||||
case UNKNOWN:
|
||||
if (!services.getMetaTableLocator().isLocationAvailable(services.getZooKeeper())) {
|
||||
// the meta location as per master is null. This could happen in case when meta
|
||||
// assignment in previous run failed, while meta znode has been updated to null.
|
||||
// We should try to assign the meta again.
|
||||
shouldAssignMeta = true;
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
case NOT_HOSTING_REGION:
|
||||
LOG.info("META has been assigned to otherwhere, skip assigning.");
|
||||
break;
|
||||
default:
|
||||
throw new IOException("Unsupported action in MetaServerShutdownHandler");
|
||||
}
|
||||
if (shouldAssignMeta) {
|
||||
// TODO: May block here if hard time figuring state of meta.
|
||||
verifyAndAssignMetaWithRetries(env);
|
||||
if (this.shouldSplitWal && distributedLogReplay) {
|
||||
int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
|
||||
@ -409,7 +432,8 @@ implements ServerProcedureInterface {
|
||||
for (int i = 1; i < replicaCount; i++) {
|
||||
HRegionInfo metaHri =
|
||||
RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
|
||||
if (am.isCarryingMetaReplica(this.serverName, metaHri)) {
|
||||
if (am.isCarryingMetaReplica(this.serverName, metaHri) ==
|
||||
AssignmentManager.ServerHostRegion.HOSTING_REGION) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName);
|
||||
}
|
||||
|
@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
|
||||
@ -103,7 +104,8 @@ public class TestServerCrashProcedure {
|
||||
master.setServerCrashProcessingEnabled(false);
|
||||
// Kill a server. Master will notice but do nothing other than add it to list of dead servers.
|
||||
HRegionServer hrs = this.util.getHBaseCluster().getRegionServer(0);
|
||||
boolean carryingMeta = master.getAssignmentManager().isCarryingMeta(hrs.getServerName());
|
||||
boolean carryingMeta = (master.getAssignmentManager().isCarryingMeta(hrs.getServerName()) ==
|
||||
AssignmentManager.ServerHostRegion.HOSTING_REGION);
|
||||
this.util.getHBaseCluster().killRegionServer(hrs.getServerName());
|
||||
hrs.join();
|
||||
// Wait until the expiration of the server has arrived at the master. We won't process it
|
||||
|
Loading…
x
Reference in New Issue
Block a user