HBASE-21537 Rewrite ServerManager.closeRegionSilentlyAndWait to use AsyncClusterConnection

Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
Duo Zhang 2019-01-10 11:47:41 +08:00 committed by zhangduo
parent d005d6f30a
commit a5bcf7d6c7
3 changed files with 39 additions and 28 deletions

View File

@ -101,7 +101,7 @@ class MasterMetaBootstrap {
RegionState r = MetaTableLocator.getMetaRegionState(zooKeeper, replicaId); RegionState r = MetaTableLocator.getMetaRegionState(zooKeeper, replicaId);
LOG.info("Closing excess replica of meta region " + r.getRegion()); LOG.info("Closing excess replica of meta region " + r.getRegion());
// send a close and wait for a max of 30 seconds // send a close and wait for a max of 30 seconds
ServerManager.closeRegionSilentlyAndWait(master.getClusterConnection(), ServerManager.closeRegionSilentlyAndWait(master.getAsyncClusterConnection(),
r.getServerName(), r.getRegion(), 30000); r.getServerName(), r.getRegion(), 30000);
ZKUtil.deleteNode(zooKeeper, zooKeeper.getZNodePaths().getZNodeForReplica(replicaId)); ZKUtil.deleteNode(zooKeeper, zooKeeper.getZNodePaths().getZNodeForReplica(replicaId));
} }

View File

@ -48,13 +48,15 @@ import org.apache.hadoop.hbase.ServerMetrics;
import org.apache.hadoop.hbase.ServerMetricsBuilder; import org.apache.hadoop.hbase.ServerMetricsBuilder;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.AsyncClusterConnection;
import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.ipc.HBaseRpcController; import org.apache.hadoop.hbase.ipc.HBaseRpcController;
import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.FutureUtils;
import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
@ -67,6 +69,7 @@ import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.StoreSequenceId; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.StoreSequenceId;
@ -661,35 +664,39 @@ public class ServerManager {
} }
/** /**
* Contacts a region server and waits up to timeout ms * Contacts a region server and waits up to timeout ms to close the region. This bypasses the
* to close the region. This bypasses the active hmaster. * active hmaster.
*/ */
public static void closeRegionSilentlyAndWait(ClusterConnection connection, public static void closeRegionSilentlyAndWait(AsyncClusterConnection connection,
ServerName server, RegionInfo region, long timeout) throws IOException, InterruptedException { ServerName server, RegionInfo region, long timeout) throws IOException, InterruptedException {
AdminService.BlockingInterface rs = connection.getAdmin(server); AsyncRegionServerAdmin admin = connection.getRegionServerAdmin(server);
HBaseRpcController controller = connection.getRpcControllerFactory().newController();
try { try {
ProtobufUtil.closeRegion(controller, rs, server, region.getRegionName()); FutureUtils.get(
admin.closeRegion(ProtobufUtil.buildCloseRegionRequest(server, region.getRegionName())));
} catch (IOException e) { } catch (IOException e) {
LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e); LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e);
} }
long expiration = timeout + System.currentTimeMillis(); long expiration = timeout + System.currentTimeMillis();
while (System.currentTimeMillis() < expiration) { while (System.currentTimeMillis() < expiration) {
controller.reset();
try { try {
RegionInfo rsRegion = RegionInfo rsRegion = ProtobufUtil.toRegionInfo(FutureUtils
ProtobufUtil.getRegionInfo(controller, rs, region.getRegionName()); .get(
if (rsRegion == null) return; admin.getRegionInfo(RequestConverter.buildGetRegionInfoRequest(region.getRegionName())))
} catch (IOException ioe) { .getRegionInfo());
if (ioe instanceof NotServingRegionException) // no need to retry again if (rsRegion == null) {
return; return;
LOG.warn("Exception when retrieving regioninfo from: " }
+ region.getRegionNameAsString(), ioe); } catch (IOException ioe) {
if (ioe instanceof NotServingRegionException) {
// no need to retry again
return;
}
LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(),
ioe);
} }
Thread.sleep(1000); Thread.sleep(1000);
} }
throw new IOException("Region " + region + " failed to close within" throw new IOException("Region " + region + " failed to close within" + " timeout " + timeout);
+ " timeout " + timeout);
} }
/** /**

View File

@ -31,7 +31,9 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.AsyncClusterConnection;
import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
@ -41,6 +43,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.security.User;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -143,16 +146,17 @@ public class HBaseFsckRepair {
} }
/** /**
* Contacts a region server and waits up to hbase.hbck.close.timeout ms * Contacts a region server and waits up to hbase.hbck.close.timeout ms (default 120s) to close
* (default 120s) to close the region. This bypasses the active hmaster. * the region. This bypasses the active hmaster.
*/ */
@SuppressWarnings("deprecation") public static void closeRegionSilentlyAndWait(Connection connection, ServerName server,
public static void closeRegionSilentlyAndWait(Connection connection, RegionInfo region) throws IOException, InterruptedException {
ServerName server, RegionInfo region) throws IOException, InterruptedException { long timeout = connection.getConfiguration().getLong("hbase.hbck.close.timeout", 120000);
long timeout = connection.getConfiguration() // this is a bit ugly but it is only used in the old hbck and tests, so I think it is fine.
.getLong("hbase.hbck.close.timeout", 120000); try (AsyncClusterConnection asyncConn = ClusterConnectionFactory
ServerManager.closeRegionSilentlyAndWait((ClusterConnection)connection, server, .createAsyncClusterConnection(connection.getConfiguration(), null, User.getCurrent())) {
region, timeout); ServerManager.closeRegionSilentlyAndWait(asyncConn, server, region, timeout);
}
} }
/** /**