HBASE-19815 Flakey TestAssignmentManager.testAssignWithRandExec (Part Two).
Part One cleaned up a ClassCastException. Part Two adds the ServerCrashProcedure#handleRIT behavior to RecoverMetaProcedure. Adds debug in the test.
This commit is contained in:
parent
646770dd51
commit
581fabe7b2
|
@ -28,6 +28,8 @@ import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
|||
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
|
||||
import org.apache.hadoop.hbase.master.MasterServices;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
|
||||
|
@ -126,17 +128,17 @@ public class RecoverMetaProcedure
|
|||
RegionInfoBuilder.FIRST_META_REGIONINFO, this.replicaId);
|
||||
|
||||
AssignProcedure metaAssignProcedure;
|
||||
AssignmentManager am = master.getAssignmentManager();
|
||||
if (failedMetaServer != null) {
|
||||
LOG.info(this + "; Assigning meta with new plan. previous meta server=" +
|
||||
failedMetaServer);
|
||||
metaAssignProcedure = master.getAssignmentManager().createAssignProcedure(hri);
|
||||
handleRIT(env, hri, this.failedMetaServer);
|
||||
LOG.info(this + "; Assigning meta with new plan; previous server=" + failedMetaServer);
|
||||
metaAssignProcedure = am.createAssignProcedure(hri);
|
||||
} else {
|
||||
// get server carrying meta from zk
|
||||
ServerName metaServer =
|
||||
MetaTableLocator.getMetaRegionState(master.getZooKeeper()).getServerName();
|
||||
LOG.info(this + "; Retaining meta assignment to server=" + metaServer);
|
||||
metaAssignProcedure =
|
||||
master.getAssignmentManager().createAssignProcedure(hri, metaServer);
|
||||
metaAssignProcedure = am.createAssignProcedure(hri, metaServer);
|
||||
}
|
||||
|
||||
addChildProcedure(metaAssignProcedure);
|
||||
|
@ -152,6 +154,32 @@ public class RecoverMetaProcedure
|
|||
return Flow.HAS_MORE_STATE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the region stuck assigning to this failedMetaServer? If so, cancel the call
|
||||
* just as we do over in ServerCrashProcedure#handleRIT except less to do here; less context
|
||||
* to carry.
|
||||
*/
|
||||
private void handleRIT(MasterProcedureEnv env, RegionInfo ri, ServerName crashedServerName) {
|
||||
AssignmentManager am = env.getAssignmentManager();
|
||||
RegionTransitionProcedure rtp = am.getRegionStates().getRegionTransitionProcedure(ri);
|
||||
if (rtp == null) {
|
||||
return; // Nothing to do. Not in RIT.
|
||||
}
|
||||
// Make sure the RIT is against this crashed server. In the case where there are many
|
||||
// processings of a crashed server -- backed up for whatever reason (slow WAL split)
|
||||
// -- then a previous SCP may have already failed an assign, etc., and it may have a
|
||||
// new location target; DO NOT fail these else we make for assign flux.
|
||||
ServerName rtpServerName = rtp.getServer(env);
|
||||
if (rtpServerName == null) {
|
||||
LOG.warn("RIT with ServerName null! " + rtp);
|
||||
} else if (rtpServerName.equals(crashedServerName)) {
|
||||
LOG.info("pid=" + getProcId() + " found RIT " + rtp + "; " +
|
||||
rtp.getRegionState(env).toShortString());
|
||||
rtp.remoteCallFailed(env, crashedServerName,
|
||||
new ServerCrashException(getProcId(), crashedServerName));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void rollbackState(MasterProcedureEnv env,
|
||||
MasterProcedureProtos.RecoverMetaState recoverMetaState)
|
||||
|
|
|
@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue;
|
|||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InterruptedIOException;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.Random;
|
||||
|
@ -206,7 +207,7 @@ public class TestAssignmentManager {
|
|||
rsDispatcher.setMockRsExecutor(new RandRsExecutor());
|
||||
// Loop a bunch of times so we hit various combos of exceptions.
|
||||
for (int i = 0; i < 10; i++) {
|
||||
LOG.info("" + i);
|
||||
LOG.info("ROUND=" + i);
|
||||
AssignProcedure proc = am.createAssignProcedure(hri);
|
||||
waitOnFuture(submitProcedure(proc));
|
||||
}
|
||||
|
@ -445,6 +446,12 @@ public class TestAssignmentManager {
|
|||
return future.get(5, TimeUnit.SECONDS);
|
||||
} catch (ExecutionException e) {
|
||||
LOG.info("ExecutionException", e);
|
||||
Exception ee = (Exception)e.getCause();
|
||||
if (ee instanceof InterruptedIOException) {
|
||||
for (Procedure p: this.master.getMasterProcedureExecutor().getProcedures()) {
|
||||
LOG.info(p.toStringDetails());
|
||||
}
|
||||
}
|
||||
throw (Exception)e.getCause();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue