HBASE-19998 Flakey TestVisibilityLabelsWithDefaultVisLabelService
Only call server.checkIfShouldMoveSystemRegionAsync if a node has been added. Do not call it if only one regionserver in cluster. Make it so ServerCrashProcedure runs before it. Add logging if server.checkIfShouldMoveSystemRegionAsync was responsible for MOVE (Previous was a mystery when it cut in). Previous we'd call it when there was a nodeChildrenChanged. These happen before nodeDeleted. If a server crashed, checkIfShouldMoveSystemRegionAsync could run first, find the server that had not yet registered as crashed, find system tables on it and then try to move them. It would fail because server would not respond to RPC. The region move would then be waiting on the servercrashprocedure to wake it up when done processing but this move had locked the region so SCP couldn't run....
This commit is contained in:
parent
816d860228
commit
50c705dad9
|
@ -486,6 +486,11 @@ public interface MasterServices extends Server {
|
|||
|
||||
public String getRegionServerVersion(final ServerName sn);
|
||||
|
||||
/**
|
||||
* Called when a new RegionServer is added to the cluster.
|
||||
* Checks if new server has a newer version than any existing server and will move system tables
|
||||
* there if so.
|
||||
*/
|
||||
public void checkIfShouldMoveSystemRegionAsync();
|
||||
|
||||
/**
|
||||
|
|
|
@ -104,9 +104,6 @@ public class RegionServerTracker extends ZKListener {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (server.isInitialized()) {
|
||||
server.checkIfShouldMoveSystemRegionAsync();
|
||||
}
|
||||
}
|
||||
|
||||
private void remove(final ServerName sn) {
|
||||
|
@ -115,6 +112,19 @@ public class RegionServerTracker extends ZKListener {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nodeCreated(String path) {
|
||||
if (path.startsWith(watcher.znodePaths.rsZNode)) {
|
||||
String serverName = ZKUtil.getNodeName(path);
|
||||
LOG.info("RegionServer ephemeral node created, adding [" + serverName + "]");
|
||||
if (server.isInitialized()) {
|
||||
// Only call the check to move servers if a RegionServer was added to the cluster; in this
|
||||
// case it could be a server with a new version so it makes sense to run the check.
|
||||
server.checkIfShouldMoveSystemRegionAsync();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nodeDeleted(String path) {
|
||||
if (path.startsWith(watcher.znodePaths.rsZNode)) {
|
||||
|
|
|
@ -455,12 +455,27 @@ public class AssignmentManager implements ServerListener {
|
|||
* If so, move all system table regions to RS with the highest version to keep compatibility.
|
||||
* The reason is, RS in new version may not be able to access RS in old version when there are
|
||||
* some incompatible changes.
|
||||
* <p>This method is called when a new RegionServer is added to cluster only.</p>
|
||||
*/
|
||||
public void checkIfShouldMoveSystemRegionAsync() {
|
||||
// TODO: Fix this thread. If a server is killed and a new one started, this thread thinks that
|
||||
// it should 'move' the system tables from the old server to the new server but
|
||||
// ServerCrashProcedure is on it; and it will take care of the assign without dataloss.
|
||||
if (this.master.getServerManager().countOfRegionServers() <= 1) {
|
||||
return;
|
||||
}
|
||||
// This thread used to run whenever there was a change in the cluster. The ZooKeeper
|
||||
// childrenChanged notification came in before the nodeDeleted message and so this method
|
||||
// cold run before a ServerCrashProcedure could run. That meant that this thread could see
|
||||
// a Crashed Server before ServerCrashProcedure and it could find system regions on the
|
||||
// crashed server and go move them before ServerCrashProcedure had a chance; could be
|
||||
// dataloss too if WALs were not recovered.
|
||||
new Thread(() -> {
|
||||
try {
|
||||
synchronized (checkIfShouldMoveSystemRegionLock) {
|
||||
List<RegionPlan> plans = new ArrayList<>();
|
||||
// TODO: I don't think this code does a good job if all servers in cluster have same
|
||||
// version. It looks like it will schedule unnecessary moves.
|
||||
for (ServerName server : getExcludedServersForSystemTable()) {
|
||||
if (master.getServerManager().isServerDead(server)) {
|
||||
// TODO: See HBASE-18494 and HBASE-18495. Though getExcludedServersForSystemTable()
|
||||
|
@ -471,13 +486,15 @@ public class AssignmentManager implements ServerListener {
|
|||
// handling.
|
||||
continue;
|
||||
}
|
||||
List<RegionInfo> regionsShouldMove = getCarryingSystemTables(server);
|
||||
List<RegionInfo> regionsShouldMove = getSystemTables(server);
|
||||
if (!regionsShouldMove.isEmpty()) {
|
||||
for (RegionInfo regionInfo : regionsShouldMove) {
|
||||
// null value for dest forces destination server to be selected by balancer
|
||||
RegionPlan plan = new RegionPlan(regionInfo, server, null);
|
||||
if (regionInfo.isMetaRegion()) {
|
||||
// Must move meta region first.
|
||||
LOG.info("Async MOVE of {} to newer Server={}",
|
||||
regionInfo.getEncodedName(), server);
|
||||
moveAsync(plan);
|
||||
} else {
|
||||
plans.add(plan);
|
||||
|
@ -485,6 +502,8 @@ public class AssignmentManager implements ServerListener {
|
|||
}
|
||||
}
|
||||
for (RegionPlan plan : plans) {
|
||||
LOG.info("Async MOVE of {} to newer Server={}",
|
||||
plan.getRegionInfo().getEncodedName(), server);
|
||||
moveAsync(plan);
|
||||
}
|
||||
}
|
||||
|
@ -495,7 +514,7 @@ public class AssignmentManager implements ServerListener {
|
|||
}).start();
|
||||
}
|
||||
|
||||
private List<RegionInfo> getCarryingSystemTables(ServerName serverName) {
|
||||
private List<RegionInfo> getSystemTables(ServerName serverName) {
|
||||
Set<RegionStateNode> regions = this.getRegionStates().getServerNode(serverName).getRegions();
|
||||
if (regions == null) {
|
||||
return new ArrayList<>();
|
||||
|
|
|
@ -54,7 +54,6 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
|
|||
public MoveRegionProcedure(final MasterProcedureEnv env, final RegionPlan plan) {
|
||||
super(env, plan.getRegionInfo());
|
||||
this.plan = plan;
|
||||
LOG.info("REMOVE", new Throwable("REMOVE: Just to see who is calling Move!!!"));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue