HBASE-18131 Add an hbase shell command to clear deadserver list in ServerManager

Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
andrewcheng 2017-09-13 10:13:12 +08:00 committed by tedyu
parent 7dc0a63c06
commit 815673f7e4
20 changed files with 3316 additions and 142 deletions

View File

@ -1583,4 +1583,18 @@ public interface Admin extends Abortable, Closeable {
SPLIT,
MERGE
}
/**
* List dead region servers.
* @return List of dead region servers.
*/
List<ServerName> listDeadServers() throws IOException;
/**
* Clear dead region servers from master.
* @param servers list of dead region servers.
* @throws IOException if a remote or network exception occurs
* @return List of servers that not cleared
*/
List<ServerName> clearDeadServers(final List<ServerName> servers) throws IOException;
}

View File

@ -1798,6 +1798,21 @@ class ConnectionManager {
MasterProtos.ListProceduresRequest request) throws ServiceException {
return stub.listProcedures(controller, request);
}
@Override
public MasterProtos.ClearDeadServersResponse clearDeadServers(
RpcController controller,
MasterProtos.ClearDeadServersRequest request) throws ServiceException {
return stub.clearDeadServers(controller, request);
}
@Override
public MasterProtos.ListDeadServersResponse listDeadServers(
RpcController controller,
MasterProtos.ListDeadServersRequest request) throws ServiceException {
return stub.listDeadServers(controller, request);
}
@Override
public AddColumnResponse addColumn(RpcController controller, AddColumnRequest request)
throws ServiceException {

View File

@ -102,6 +102,7 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AbortProcedureReq
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AbortProcedureResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AddColumnRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AssignRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ClearDeadServersRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateNamespaceRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateTableResponse;
@ -135,6 +136,7 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshot
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListDeadServersRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListNamespaceDescriptorsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListProceduresRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceRequest;
@ -4899,6 +4901,34 @@ public class HBaseAdmin implements Admin {
});
}
@Override
public List<ServerName> listDeadServers() throws IOException {
return executeCallable(new MasterCallable<List<ServerName>>(getConnection()) {
@Override
public List<ServerName> call(int callTimeout) throws ServiceException {
ListDeadServersRequest req = ListDeadServersRequest.newBuilder().build();
return ProtobufUtil.toServerNameList(
master.listDeadServers(null, req).getServerNameList());
}
});
}
@Override
public List<ServerName> clearDeadServers(final List<ServerName> servers) throws IOException {
if (servers == null || servers.size() == 0) {
throw new IllegalArgumentException("servers cannot be null or empty");
}
return executeCallable(new MasterCallable<List<ServerName>>(getConnection()) {
@Override
public List<ServerName> call(int callTimeout) throws Exception {
ClearDeadServersRequest req = RequestConverter.buildClearDeadServersRequest(servers);
return ProtobufUtil.toServerNameList(
master.clearDeadServers(null, req).getServerNameList());
}
});
}
private RpcControllerFactory getRpcControllerFactory() {
return rpcControllerFactory;
}

View File

@ -396,6 +396,20 @@ public final class ProtobufUtil {
}
return ServerName.valueOf(hostName, port, startCode);
}
/**
* Convert a list of protocol buffer ServerName to a list of ServerName
* @param proto protocol buffer ServerNameList
* @return a list of ServerName
*/
public static List<ServerName> toServerNameList(
List<HBaseProtos.ServerName> proto) {
List<ServerName> servers = new ArrayList<ServerName>();
for (HBaseProtos.ServerName pbServer : proto) {
servers.add(toServerName(pbServer));
}
return servers;
}
/**
* Get HTableDescriptor[] from GetTableDescriptorsResponse protobuf

View File

@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AddColumnRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AssignRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.BalanceRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ClearDeadServersRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteColumnRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteTableRequest;
@ -1810,6 +1811,14 @@ public final class RequestConverter {
return builder.build();
}
public static ClearDeadServersRequest buildClearDeadServersRequest(List<ServerName> deadServers) {
ClearDeadServersRequest.Builder builder = ClearDeadServersRequest.newBuilder();
for(ServerName server: deadServers) {
builder.addServerName(ProtobufUtil.toServerName(server));
}
return builder.build();
}
private static MasterProtos.MasterSwitchType convert(Admin.MasterSwitchType switchType) {
switch (switchType) {
case SPLIT:

View File

@ -556,6 +556,21 @@ message SecurityCapabilitiesResponse {
repeated Capability capabilities = 1;
}
message ListDeadServersRequest {
}
message ListDeadServersResponse {
repeated ServerName server_name = 1;
}
message ClearDeadServersRequest {
repeated ServerName server_name = 1;
}
message ClearDeadServersResponse {
repeated ServerName server_name = 1;
}
service MasterService {
/** Used by the client to get the number of regions that have received the updated schema */
rpc GetSchemaAlterStatus(GetSchemaAlterStatusRequest)
@ -848,4 +863,13 @@ service MasterService {
/** returns a list of procedures */
rpc ListProcedures(ListProceduresRequest)
returns(ListProceduresResponse);
/** clear dead servers from master*/
rpc ClearDeadServers(ClearDeadServersRequest)
returns(ClearDeadServersResponse);
/** Returns a list of Dead Servers. */
rpc ListDeadServers(ListDeadServersRequest)
returns(ListDeadServersResponse);
}

View File

@ -64,6 +64,26 @@ public class BaseMasterAndRegionObserver extends BaseRegionObserver
HRegionInfo regionA, HRegionInfo regionB) throws IOException {
}
@Override
public void preListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void postListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void preClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void preCreateTableHandler(
final ObserverContext<MasterCoprocessorEnvironment> ctx,

View File

@ -75,6 +75,26 @@ public class BaseMasterObserver implements MasterObserver {
HRegionInfo regionA, HRegionInfo regionB) throws IOException {
}
@Override
public void preListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void postListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void preClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void preDeleteTable(ObserverContext<MasterCoprocessorEnvironment> ctx,
TableName tableName) throws IOException {

View File

@ -1044,4 +1044,25 @@ public interface MasterObserver extends Coprocessor {
*/
void postDispatchMerge(final ObserverContext<MasterCoprocessorEnvironment> c,
final HRegionInfo regionA, final HRegionInfo regionB) throws IOException;
/**
* Called before list dead region servers.
*/
void preListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx) throws IOException;
/**
* Called after list dead region servers.
*/
void postListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx) throws IOException;
/**
* Called before clear dead region servers.
*/
void preClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx) throws IOException;
/**
* Called after clear dead region servers.
*/
void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx) throws IOException;
}

View File

@ -202,4 +202,17 @@ public class DeadServer {
return o1.getSecond().compareTo(o2.getSecond());
}
};
/**
* remove the specified dead server
* @param deadServerName the dead server name
* @return true if this server was removed
*/
public synchronized boolean removeDeadServer(final ServerName deadServerName) {
if (deadServers.remove(deadServerName) == null) {
return false;
}
return true;
}
}

View File

@ -1172,6 +1172,47 @@ public class MasterCoprocessorHost
});
}
public void preListDeadServers() throws IOException {
execOperation(coprocessors.isEmpty() ? null : new CoprocessorOperation() {
@Override
public void call(MasterObserver oserver, ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
oserver.preListDeadServers(ctx);
}
});
}
public void postListDeadServers() throws IOException {
execOperation(coprocessors.isEmpty() ? null : new CoprocessorOperation() {
@Override
public void call(MasterObserver oserver, ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
oserver.postListDeadServers(ctx);
}
});
}
public void preClearDeadServers() throws IOException {
execOperation(coprocessors.isEmpty() ? null : new CoprocessorOperation() {
@Override
public void call(MasterObserver oserver, ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
oserver.preClearDeadServers(ctx);
}
});
}
public void postClearDeadServers() throws IOException {
execOperation(coprocessors.isEmpty() ? null : new CoprocessorOperation() {
@Override
public void call(MasterObserver oserver, ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
oserver.postClearDeadServers(ctx);
}
});
}
private static abstract class CoprocessorOperation
extends ObserverContext<MasterCoprocessorEnvironment> {
public CoprocessorOperation() {

View File

@ -69,6 +69,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AssignRegionReque
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AssignRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.BalanceRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.BalanceResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ClearDeadServersRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ClearDeadServersResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateNamespaceRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateNamespaceResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateTableRequest;
@ -123,6 +125,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshot
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListDeadServersRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListDeadServersResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListNamespaceDescriptorsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListNamespaceDescriptorsResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListProceduresRequest;
@ -1136,6 +1140,66 @@ public class MasterRpcServices extends RSRpcServices
}
}
@Override
public ListDeadServersResponse listDeadServers(RpcController controller,
ListDeadServersRequest request) throws ServiceException {
LOG.debug(master.getClientIdAuditPrefix() + " list dead region servers.");
ListDeadServersResponse.Builder response = ListDeadServersResponse.newBuilder();
try {
master.checkInitialized();
if (master.cpHost != null) {
master.cpHost.preListDeadServers();
}
Set<ServerName> servers = master.getServerManager().getDeadServers().copyServerNames();
for (ServerName server : servers) {
response.addServerName(ProtobufUtil.toServerName(server));
}
if (master.cpHost != null) {
master.cpHost.postListDeadServers();
}
} catch (IOException io) {
throw new ServiceException(io);
}
return response.build();
}
@Override
public ClearDeadServersResponse clearDeadServers(RpcController controller,
ClearDeadServersRequest request) throws ServiceException {
LOG.debug(master.getClientIdAuditPrefix() + " clear dead region servers.");
ClearDeadServersResponse.Builder response = ClearDeadServersResponse.newBuilder();
try {
master.checkInitialized();
if (master.cpHost != null) {
master.cpHost.preClearDeadServers();
}
if (master.getServerManager().areDeadServersInProgress()) {
LOG.debug("Some dead server is still under processing, won't clear the dead server list");
response.addAllServerName(request.getServerNameList());
} else {
for (HBaseProtos.ServerName pbServer : request.getServerNameList()) {
if (!master.getServerManager().getDeadServers()
.removeDeadServer(ProtobufUtil.toServerName(pbServer))) {
response.addServerName(pbServer);
}
}
}
if (master.cpHost != null) {
master.cpHost.postClearDeadServers();
}
} catch (IOException io) {
throw new ServiceException(io);
}
return response.build();
}
@Override
public ListNamespaceDescriptorsResponse listNamespaceDescriptors(RpcController c,
ListNamespaceDescriptorsRequest request) throws ServiceException {

View File

@ -2604,6 +2604,24 @@ public class AccessController extends BaseMasterAndRegionObserver
Action.ADMIN);
}
@Override
public void preListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException { }
@Override
public void postListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException { }
@Override
public void preClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
requirePermission("clearDeadServers", Action.ADMIN);
}
@Override
public void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException { }
@Override
public void preMerge(ObserverContext<RegionServerCoprocessorEnvironment> ctx, Region regionA,
Region regionB) throws IOException {

View File

@ -270,6 +270,26 @@ public class TestMasterObserver {
postDispatchMergeCalled = true;
}
@Override
public void preListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void postListDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void preClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
@Override
public void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
throws IOException {
}
public boolean wasDispatchMergeCalled() {
return preDispatchMergeCalled && postDispatchMergeCalled;
}

View File

@ -150,5 +150,23 @@ public class TestDeadServer {
Assert.assertTrue(d.isEmpty());
}
@Test
public void testClearDeadServer(){
DeadServer d = new DeadServer();
d.add(hostname123);
d.add(hostname1234);
Assert.assertEquals(2, d.size());
d.removeDeadServer(hostname123);
Assert.assertEquals(1, d.size());
d.removeDeadServer(hostname1234);
Assert.assertTrue(d.isEmpty());
d.add(hostname1234);
Assert.assertFalse(d.removeDeadServer(hostname123_2));
Assert.assertEquals(1, d.size());
Assert.assertTrue(d.removeDeadServer(hostname1234));
Assert.assertTrue(d.isEmpty());
}
}

View File

@ -1170,5 +1170,27 @@ module Hbase
set_user_metadata(htd, arg.delete(METADATA)) if arg[METADATA]
set_descriptor_config(htd, arg.delete(CONFIGURATION)) if arg[CONFIGURATION]
end
#----------------------------------------------------------------------------------------------
# clear dead region servers
def list_deadservers
@admin.listDeadServers.to_a
end
#----------------------------------------------------------------------------------------------
# clear dead region servers
def clear_deadservers(dead_servers)
# Flatten params array
dead_servers = dead_servers.flatten.compact
if dead_servers.empty?
servers = list_deadservers
else
servers = java.util.ArrayList.new
dead_servers.each do |s|
servers.add(ServerName.valueOf(s))
end
end
@admin.clearDeadServers(servers).to_a
end
end
end

View File

@ -344,6 +344,8 @@ Shell.load_command_group(
trace
splitormerge_switch
splitormerge_enabled
list_deadservers
clear_deadservers
],
# TODO remove older hlog_roll command
:aliases => {

View File

@ -0,0 +1,52 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
module Shell
module Commands
class ClearDeadservers < Command
def help
<<-EOF
Clear the dead region servers that are never used.
Examples:
Clear all dead region servers:
hbase> clear_deadservers
Clear the specified dead region servers
hbase> clear_deadservers 'host187.example.com,60020,1289493121758'
or
hbase> clear_deadservers 'host187.example.com,60020,1289493121758',
'host188.example.com,60020,1289493121758'
EOF
end
def command(*dead_servers)
servers = admin.clear_deadservers(dead_servers)
if servers.size <= 0
formatter.row(['true'])
else
formatter.row(['Some dead server clear failed'])
formatter.row(['SERVERNAME'])
servers.each do |server|
formatter.row([server.toString])
end
formatter.footer(servers.size)
end
end
end
end
end

View File

@ -0,0 +1,43 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
module Shell
module Commands
class ListDeadservers < Command
def help
<<-EOF
List all dead region servers in hbase
Examples:
hbase> list_deadservers
EOF
end
def command
formatter.header(['SERVERNAME'])
servers = admin.list_deadservers
servers.each do |server|
formatter.row([server.toString])
end
formatter.footer(servers.size)
end
end
end
end