HBASE-21322 Add a scheduleServerCrashProcedure() API to HbckService

Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
tianjingyun 2018-10-27 10:06:56 +08:00 committed by Michael Stack
parent 644ac13850
commit 422e98957b
No known key found for this signature in database
GPG Key ID: 9816C7FC8ACC93D2
7 changed files with 123 additions and 5 deletions

View File

@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetTableStateResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckService.BlockingInterface;
@ -154,4 +155,21 @@ public class HBaseHbck implements Hbck {
});
return response.getBypassedList();
}
@Override
public List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> serverNames)
throws IOException {
try {
MasterProtos.ScheduleServerCrashProcedureResponse response =
this.hbck.scheduleServerCrashProcedure(rpcControllerFactory.newController(),
RequestConverter.toScheduleServerCrashProcedureRequest(serverNames));
return response.getPidList();
} catch (ServiceException se) {
LOG.debug(toCommaDelimitedString(
serverNames.stream().map(serverName -> ProtobufUtil.toServerName(serverName).toString())
.collect(Collectors.toList())),
se);
throw new IOException(se);
}
}
}

View File

@ -25,6 +25,8 @@ import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
/**
* Hbck fixup tool APIs. Obtain an instance from {@link ClusterConnection#getHbck()} and call
* {@link #close()} when done.
@ -101,4 +103,7 @@ public interface Hbck extends Abortable, Closeable {
*/
List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override, boolean recursive)
throws IOException;
List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> serverNames)
throws IOException;
}

View File

@ -1897,6 +1897,13 @@ public final class RequestConverter {
setOverride(override).build();
}
public static MasterProtos.ScheduleServerCrashProcedureRequest
toScheduleServerCrashProcedureRequest(List<HBaseProtos.ServerName> serverNames) {
MasterProtos.ScheduleServerCrashProcedureRequest.Builder b =
MasterProtos.ScheduleServerCrashProcedureRequest.newBuilder();
return b.addAllServerName(serverNames).build();
}
private static List<RegionSpecifier> toEncodedRegionNameRegionSpecifiers(
List<String> encodedRegionNames) {
return encodedRegionNames.stream().

View File

@ -1042,6 +1042,14 @@ message BypassProcedureResponse {
repeated bool bypassed = 1;
}
message ScheduleServerCrashProcedureRequest {
repeated ServerName serverName = 1;
}
message ScheduleServerCrashProcedureResponse {
repeated uint64 pid = 1;
}
service HbckService {
/** Update state of the table in meta only*/
rpc SetTableStateInMeta(SetTableStateInMetaRequest)
@ -1068,4 +1076,8 @@ service HbckService {
/** Bypass a procedure to completion, procedure is completed but no actual work is done*/
rpc BypassProcedure(BypassProcedureRequest)
returns(BypassProcedureResponse);
/** Schedule a ServerCrashProcedure to help recover a crash server */
rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest)
returns(ScheduleServerCrashProcedureResponse);
}

View File

@ -18,6 +18,8 @@
*/
package org.apache.hadoop.hbase.master;
import static org.apache.hadoop.hbase.master.MasterWalManager.META_FILTER;
import java.io.IOException;
import java.net.BindException;
import java.net.InetAddress;
@ -32,6 +34,7 @@ import java.util.Set;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterMetricsBuilder;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HConstants;
@ -65,8 +68,10 @@ import org.apache.hadoop.hbase.ipc.RpcServerInterface;
import org.apache.hadoop.hbase.ipc.ServerRpcController;
import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.master.locking.LockProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedureRunnable;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.procedure.MasterProcedureManager;
import org.apache.hadoop.hbase.procedure2.LockType;
@ -97,6 +102,7 @@ import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
@ -2424,4 +2430,52 @@ public class MasterRpcServices extends RSRpcServices
throw new ServiceException(e);
}
}
@Override
public MasterProtos.ScheduleServerCrashProcedureResponse scheduleServerCrashProcedure(
RpcController controller, MasterProtos.ScheduleServerCrashProcedureRequest request)
throws ServiceException {
List<HBaseProtos.ServerName> serverNames = request.getServerNameList();
List<Long> pids = new ArrayList<>();
try {
for (HBaseProtos.ServerName serverName : serverNames) {
ServerName server = ProtobufUtil.toServerName(serverName);
if (shouldSubmitSCP(server)) {
ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();
pids.add(procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(),
server, true, containMetaWals(server))));
} else {
pids.add(-1L);
}
}
return MasterProtos.ScheduleServerCrashProcedureResponse.newBuilder().addAllPid(pids).build();
} catch (IOException e) {
throw new ServiceException(e);
}
}
private boolean containMetaWals(ServerName serverName) throws IOException {
Path logDir = new Path(master.getWALRootDir(),
AbstractFSWALProvider.getWALDirectoryName(serverName.toString()));
Path splitDir = logDir.suffix(AbstractFSWALProvider.SPLITTING_EXT);
Path checkDir = master.getFileSystem().exists(splitDir) ? splitDir : logDir;
return master.getFileSystem().listStatus(checkDir, META_FILTER).length > 0;
}
private boolean shouldSubmitSCP(ServerName serverName) {
// check if there is already a SCP of this server running
List<Procedure<MasterProcedureEnv>> procedures =
master.getMasterProcedureExecutor().getProcedures();
for (Procedure<MasterProcedureEnv> procedure : procedures) {
if (procedure instanceof ServerCrashProcedure) {
if (serverName.compareTo(((ServerCrashProcedure) procedure).getServerName()) == 0
&& !procedure.isFinished()) {
LOG.info("there is already a SCP of this server {} running, pid {}", serverName,
procedure.getProcId());
return false;
}
}
}
return true;
}
}

View File

@ -1330,13 +1330,14 @@ public class AssignmentManager implements ServerListener {
return 0;
}
public void submitServerCrash(final ServerName serverName, final boolean shouldSplitWal) {
public long submitServerCrash(final ServerName serverName, final boolean shouldSplitWal) {
boolean carryingMeta = isCarryingMeta(serverName);
ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();
procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(), serverName,
shouldSplitWal, carryingMeta));
LOG.debug("Added=" + serverName +
" to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);
long pid = procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(),
serverName, shouldSplitWal, carryingMeta));
LOG.debug("Added=" + serverName
+ " to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);
return pid;
}
public void offlineRegion(final RegionInfo regionInfo) {

View File

@ -28,6 +28,7 @@ import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
@ -36,6 +37,7 @@ import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
@ -50,6 +52,8 @@ import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
/**
* Class to test HBaseHbck.
* Spins up the minicluster once at test start and then takes it down afterward.
@ -173,6 +177,23 @@ public class TestHbck {
}
}
@Test
public void testScheduleSCP() throws Exception {
HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
TEST_UTIL.loadTable(TEST_UTIL.getConnection().getTable(TABLE_NAME), Bytes.toBytes("family1"),
true);
ServerName serverName = testRs.getServerName();
Hbck hbck = TEST_UTIL.getHbck();
List<Long> pids =
hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName)));
assertTrue(pids.get(0) > 0);
LOG.info("pid is {}", pids.get(0));
pids = hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName)));
assertTrue(pids.get(0) == -1);
LOG.info("pid is {}", pids.get(0));
}
private void waitOnPids(List<Long> pids) {
for (Long pid: pids) {
while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().