HBASE-17370 Fix or provide shell scripts to drain and decommission region server

Add shell support for the following:
- List decommissioned/draining region servers
- Decommission a list of region servers, optionally offload corresponding regions
- Recommission a region server, optionally load a list of passed regions

Signed-off-by: Guanghao Zhang <zghao@apache.org>
This commit is contained in:
Nihal Jain 2019-01-16 11:50:06 +05:30 committed by Guanghao Zhang
parent cf0cac1041
commit 46a53cac50
7 changed files with 375 additions and 3 deletions

View File

@ -1075,15 +1075,37 @@ module Hbase
@admin.getClusterStatus.getServers.map { |serverName| serverName }
end
#----------------------------------------------------------------------------------------------
# Returns servername corresponding to passed server_name_string
def getServerName(server_name_string)
regionservers = getRegionServers
if ServerName.isFullServerName(server_name_string)
return ServerName.valueOf(server_name_string)
else
name_list = server_name_string.split(',')
regionservers.each do|sn|
if name_list[0] == sn.hostname && (name_list[1].nil? ? true : (name_list[1] == sn.port.to_s))
return sn
end
end
end
return nil
end
#----------------------------------------------------------------------------------------------
# Returns a list of servernames
def getServerNames(servers)
def getServerNames(servers, should_return_all_if_servers_empty)
regionservers = getRegionServers
servernames = []
if servers.empty?
# if no servers were specified as arguments, get a list of all servers
servernames = regionservers
if should_return_all_if_servers_empty
servernames = regionservers
end
else
# Strings replace with ServerName objects in servers array
i = 0
@ -1322,6 +1344,77 @@ module Hbase
preserve_splits)
end
#----------------------------------------------------------------------------------------------
# List decommissioned RegionServers
def list_decommissioned_regionservers
@admin.listDecommissionedRegionServers
end
#----------------------------------------------------------------------------------------------
# Decommission a list of region servers, optionally offload corresponding regions
def decommission_regionservers(host_or_servers, should_offload)
# Fail if host_or_servers is neither a string nor an array
unless host_or_servers.is_a?(Array) || host_or_servers.is_a?(String)
raise(ArgumentError,
"#{host_or_servers.class} of #{host_or_servers.inspect} is not of Array/String type")
end
# Fail if should_offload is neither a TrueClass/FalseClass nor a string
unless (!!should_offload == should_offload) || should_offload.is_a?(String)
raise(ArgumentError, "#{should_offload} is not a boolean value")
end
# If a string is passed, convert it to an array
_host_or_servers = host_or_servers.is_a?(Array) ?
host_or_servers :
java.util.Arrays.asList(host_or_servers)
# Retrieve the server names corresponding to passed _host_or_servers list
server_names = getServerNames(_host_or_servers, false)
# Fail, if we can not find any server(s) corresponding to the passed host_or_servers
if server_names.empty?
raise(ArgumentError,
"Could not find any server(s) with specified name(s): #{host_or_servers}")
end
@admin.decommissionRegionServers(server_names,
java.lang.Boolean.valueOf(should_offload))
end
#----------------------------------------------------------------------------------------------
# Recommission a region server, optionally load a list of passed regions
def recommission_regionserver(server_name_string, encoded_region_names)
# Fail if server_name_string is not a string
unless server_name_string.is_a?(String)
raise(ArgumentError,
"#{server_name_string.class} of #{server_name_string.inspect} is not of String type")
end
# Fail if encoded_region_names is not an array
unless encoded_region_names.is_a?(Array)
raise(ArgumentError,
"#{encoded_region_names.class} of #{encoded_region_names.inspect} is not of Array type")
end
# Convert encoded_region_names from string to bytes (element-wise)
region_names_in_bytes = encoded_region_names
.map {|region_name| region_name.to_java_bytes}
.compact
# Retrieve the server name corresponding to the passed server_name_string
server_name = getServerName(server_name_string)
# Fail if we can not find a server corresponding to the passed server_name_string
if server_name.nil?
raise(ArgumentError,
"Could not find any server with name #{server_name_string}")
end
@admin.recommissionRegionServer(server_name, region_names_in_bytes)
end
#----------------------------------------------------------------------------------------------
# Stop the active Master
def stop_master
@admin.stopMaster

View File

@ -362,6 +362,9 @@ Shell.load_command_group(
stop_master
stop_regionserver
rit
list_decommissioned_regionservers
decommission_regionservers
recommission_regionserver
],
# TODO: remove older hlog_roll command
aliases: {

View File

@ -0,0 +1,49 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
module Shell
module Commands
# Decommission a list of region servers, optionally offload corresponding regions
class DecommissionRegionservers < Command
def help
<<-EOF
Mark region server(s) as decommissioned to prevent additional regions from
getting assigned to them.
Optionally, offload the regions on the servers by passing true.
NOTE: Region offloading is asynchronous.
If there are multiple servers to be decommissioned, decommissioning them
at the same time can prevent wasteful region movements.
Examples:
hbase> decommission_regionservers 'server'
hbase> decommission_regionservers 'server,port'
hbase> decommission_regionservers 'server,port,starttime'
hbase> decommission_regionservers 'server', false
hbase> decommission_regionservers ['server1','server2'], true
EOF
end
def command(server_names, should_offload = false)
admin.decommission_regionservers(server_names, should_offload)
end
end
end
end

View File

@ -0,0 +1,42 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
module Shell
module Commands
# List decommissioned region servers
class ListDecommissionedRegionservers < Command
def help
<<-EOF
List region servers marked as decommissioned, which can not be assigned regions.
EOF
end
def command
formatter.header(['DECOMMISSIONED REGION SERVERS'])
list = admin.list_decommissioned_regionservers
list.each do |server_name|
formatter.row([server_name.getServerName])
end
formatter.footer(list.size)
end
end
end
end

View File

@ -49,7 +49,7 @@ EOF
hosts = args
end
hosts = admin.getServerNames(hosts)
hosts = admin.getServerNames(hosts, true)
if hosts.nil?
puts 'No regionservers available.'

View File

@ -0,0 +1,44 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
module Shell
module Commands
# Recommission a region server, optionally load a list of passed regions
class RecommissionRegionserver < Command
def help
<<-EOF
Remove decommission marker from a region server to allow regions assignments.
Optionally, load regions onto the server by passing a list of encoded region names.
NOTE: Region loading is asynchronous.
Examples:
hbase> recommission_regionserver 'server'
hbase> recommission_regionserver 'server,port'
hbase> recommission_regionserver 'server,port,starttime'
hbase> recommission_regionserver 'server,port,starttime', ['encoded_region_name1', 'encoded_region_name1']
EOF
end
def command(server_name, encoded_region_names = [])
admin.recommission_regionserver(server_name, encoded_region_names)
end
end
end
end

View File

@ -291,5 +291,146 @@ module Hbase
drop_test_table(new_table)
end
end
class CommissioningTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
# Create test table if it does not exist
@test_name = 'hbase_shell_commissioning_test'
drop_test_table(@test_name)
create_test_table(@test_name)
end
def teardown
shutdown
end
define_test 'list decommissioned regionservers' do
server_name = admin.getServerNames([], true)[0].getServerName()
command(:decommission_regionservers, server_name)
begin
output = capture_stdout { command(:list_decommissioned_regionservers) }
puts "#{output}"
assert output.include? 'DECOMMISSIONED REGION SERVERS'
assert output.include? "#{server_name}"
assert output.include? '1 row(s)'
ensure
command(:recommission_regionserver, server_name)
output = capture_stdout { command(:list_decommissioned_regionservers) }
puts "#{output}"
assert output.include? 'DECOMMISSIONED REGION SERVERS'
assert (output.include? "#{server_name}") ? false : true
assert output.include? '0 row(s)'
end
end
define_test 'decommission regionservers without offload' do
server_name = admin.getServerNames([], true)[0].getServerName()
command(:decommission_regionservers, server_name, false)
begin
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert (output.include? "#{server_name}")
ensure
command(:recommission_regionserver, server_name)
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert (output.include? "#{server_name}") ? false : true
end
end
define_test 'decommission regionservers with server names as list' do
server_name = admin.getServerNames([], true)[0].getServerName()
command(:decommission_regionservers, [server_name])
begin
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert (output.include? "#{server_name}")
ensure
command(:recommission_regionserver, server_name)
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert (output.include? "#{server_name}") ? false : true
end
end
define_test 'decommission regionservers with server host name only' do
server_name = admin.getServerNames([], true)[0]
host_name = server_name.getHostname
server_name_str = server_name.getServerName
command(:decommission_regionservers, host_name)
begin
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert output.include? "#{server_name_str}"
ensure
command(:recommission_regionserver, host_name)
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert (output.include? "#{server_name_str}") ? false : true
end
end
define_test 'decommission regionservers with server host name and port' do
server_name = admin.getServerNames([], true)[0]
host_name_and_port = server_name.getHostname + ',' +server_name.getPort.to_s
server_name_str = server_name.getServerName
command(:decommission_regionservers, host_name_and_port)
begin
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert output.include? "#{server_name_str}"
ensure
command(:recommission_regionserver, host_name_and_port)
output = capture_stdout { command(:list_decommissioned_regionservers) }
assert (output.include? "#{server_name_str}") ? false : true
end
end
define_test 'decommission regionservers with non-existant server name' do
server_name = admin.getServerNames([], true)[0].getServerName()
assert_raise(ArgumentError) do
command(:decommission_regionservers, 'dummy')
end
end
define_test 'recommission regionserver with non-existant server name' do
server_name = admin.getServerNames([], true)[0].getServerName()
assert_raise(ArgumentError) do
command(:recommission_regionserver, 'dummy')
end
end
define_test 'decommission regionservers with invalid argument' do
assert_raise(ArgumentError) do
command(:decommission_regionservers, 1)
end
assert_raise(ArgumentError) do
command(:decommission_regionservers, {1=>1})
end
assert_raise(ArgumentError) do
command(:decommission_regionservers, 'dummy', 1)
end
assert_raise(ArgumentError) do
command(:decommission_regionservers, 'dummy', {1=>1})
end
end
define_test 'recommission regionserver with invalid argument' do
assert_raise(ArgumentError) do
command(:recommission_regionserver, 1)
end
assert_raise(ArgumentError) do
command(:recommission_regionserver, {1=>1})
end
assert_raise(ArgumentError) do
command(:recommission_regionserver, 'dummy', 1)
end
assert_raise(ArgumentError) do
command(:recommission_regionserver, 'dummy', {1=>1})
end
end
end
# rubocop:enable ClassLength
end