hbase/bin/hirb.rb

498 lines
16 KiB
Ruby

# File passed to org.jruby.Main by bin/hbase. Pollutes jirb with hbase imports
# and hbase commands and then loads jirb. Outputs a banner that tells user
# where to find help, shell version, and loads up a custom hirb.
# TODO: Add 'debug' support (client-side logs show in shell). Add it as
# command-line option and as command.
# TODO: Interrupt a table creation or a connection to a bad master. Currently
# has to time out. Below we've set down the retries for rpc and hbase but
# still can be annoying (And there seem to be times when we'll retry for
# ever regardless)
# TODO: Add support for listing and manipulating catalog tables, etc.
# TODO: Encoding; need to know how to go from ruby String to UTF-8 bytes
# Run the java magic include and import basic HBase types that will help ease
# hbase hacking.
include Java
# Some goodies for hirb. Should these be left up to the user's discretion?
require 'irb/completion'
# Hack to turn down zk logging so it don't spew over the shell
# log4j.logger.org.apache.zookeeper=INFO
logger = org.apache.log4j.Logger.getLogger("org.apache.zookeeper")
logger.setLevel(org.apache.log4j.Level::WARN);
# Add the $HBASE_HOME/bin directory, the location of this script, to the ruby
# load path so I can load up my HBase ruby modules
$LOAD_PATH.unshift File.dirname($PROGRAM_NAME)
# Require formatter and hbase
require 'Formatter'
require 'HBase'
# See if there are args for this shell. If any, read and then strip from ARGV
# so they don't go through to irb. Output shell 'usage' if user types '--help'
cmdline_help = <<HERE # HERE document output as shell usage
HBase Shell command-line options:
format Formatter for outputting results: console | html. Default: console
format-width Width of table outputs. Default: 110 characters.
HERE
found = []
format = 'console'
format_width = 110
script2run = nil
for arg in ARGV
if arg =~ /^--format=(.+)/i
format = $1
if format =~ /^html$/i
raise NoMethodError.new("Not yet implemented")
elsif format =~ /^console$/i
# This is default
else
raise ArgumentError.new("Unsupported format " + arg)
end
found.push(arg)
elsif arg =~ /^--format-width=(.+)/i
format_width = $1.to_i
found.push(arg)
elsif arg == '-h' || arg == '--help'
puts cmdline_help
exit
else
# Presume it a script. Save it off for running later below
# after we've set up some environment.
script2run = arg
found.push(arg)
# Presume that any other args are meant for the script.
break
end
end
for arg in found
ARGV.delete(arg)
end
# Presume console format.
# Formatter takes an :output_stream parameter, if you don't want STDOUT.
@formatter = Formatter::Console.new(:format_width => format_width)
# TODO, etc. @formatter = Formatter::XHTML.new(STDOUT)
# Setup the HBase module. Create a configuration.
# Turn off retries in hbase and ipc. Human doesn't want to wait on N retries.
@configuration = org.apache.hadoop.hbase.HBaseConfiguration.new()
@configuration.setInt("hbase.client.retries.number", 5)
@configuration.setInt("ipc.client.connect.max.retries", 3)
# Do lazy create of admin because if we are pointed at bad master, it will hang
# shell on startup trying to connect.
@admin = nil
# Promote hbase constants to be constants of this module so can
# be used bare as keys in 'create', 'alter', etc. To see constants
# in IRB, type 'Object.constants'. Don't promote defaults because
# flattens all types to String. Can be confusing.
def promoteConstants(constants)
# The constants to import are all in uppercase
for c in constants
if c == c.upcase
eval("%s = \"%s\"" % [c, c]) unless c =~ /DEFAULT_.*/
end
end
end
promoteConstants(org.apache.hadoop.hbase.HColumnDescriptor.constants)
promoteConstants(org.apache.hadoop.hbase.HTableDescriptor.constants)
promoteConstants(HBase.constants)
# Start of the hbase shell commands.
# General shell methods
def tools
# Help for hbase shell surgery tools
h = <<HERE
HBASE SURGERY TOOLS:
close_region Close a single region. Optionally specify regionserver.
Examples:
hbase> close_region 'REGIONNAME'
hbase> close_region 'REGIONNAME', 'REGIONSERVER_IP:PORT'
compact Compact all regions in passed table or pass a region row
to compact an individual region
disable_region Disable a single region
enable_region Enable a single region. For example:
hbase> enable_region 'REGIONNAME'
flush Flush all regions in passed table or pass a region row to
flush an individual region. For example:
hbase> flush 'TABLENAME'
hbase> flush 'REGIONNAME'
major_compact Run major compaction on passed table or pass a region row
to major compact an individual region
split Split table or pass a region row to split individual region
zk Low level ZooKeeper surgery tools. Type "zk 'help'" for more
information (Yes, you must quote 'help').
zk_dump Dump status of HBase cluster as seen by ZooKeeper.
Above commands are for 'experts'-only as misuse can damage an install
HERE
puts h
end
def help
# Output help. Help used to be a dictionary of name to short and long
# descriptions emitted using Formatters but awkward getting it to show
# nicely on console; instead use a HERE document. Means we can't
# output help other than on console but not an issue at the moment.
# TODO: Add help to the commands themselves rather than keep it distinct
h = <<HERE
HBASE SHELL COMMANDS:
alter Alter column family schema; pass table name and a dictionary
specifying new column family schema. Dictionaries are described
below in the GENERAL NOTES section. Dictionary must include name
of column family to alter. For example,
To change or add the 'f1' column family in table 't1' from defaults
to instead keep a maximum of 5 cell VERSIONS, do:
hbase> alter 't1', {NAME => 'f1', VERSIONS => 5}
To delete the 'f1' column family in table 't1', do:
hbase> alter 't1', {NAME => 'f1', METHOD => 'delete'}
You can also change table-scope attributes like MAX_FILESIZE
MEMSTORE_FLUSHSIZE and READONLY.
For example, to change the max size of a family to 128MB, do:
hbase> alter 't1', {METHOD => 'table_att', MAX_FILESIZE => '134217728'}
count Count the number of rows in a table. This operation may take a LONG
time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a
counting mapreduce job). Current count is shown every 1000 rows by
default. Count interval may be optionally specified. Examples:
hbase> count 't1'
hbase> count 't1', 100000
create Create table; pass table name, a dictionary of specifications per
column family, and optionally a dictionary of table configuration.
Dictionaries are described below in the GENERAL NOTES section.
Examples:
hbase> create 't1', {NAME => 'f1', VERSIONS => 5}
hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
hbase> # The above in shorthand would be the following:
hbase> create 't1', 'f1', 'f2', 'f3'
hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, \\
BLOCKCACHE => true}
describe Describe the named table: e.g. "hbase> describe 't1'"
delete Put a delete cell value at specified table/row/column and optionally
timestamp coordinates. Deletes must match the deleted cell's
coordinates exactly. When scanning, a delete cell suppresses older
versions. Takes arguments like the 'put' command described below
deleteall Delete all cells in a given row; pass a table name, row, and optionally
a column and timestamp
disable Disable the named table: e.g. "hbase> disable 't1'"
drop Drop the named table. Table must first be disabled. If table has
more than one region, run a major compaction on .META.:
hbase> major_compact ".META."
enable Enable the named table
exists Does the named table exist? e.g. "hbase> exists 't1'"
exit Type "hbase> exit" to leave the HBase Shell
get Get row or cell contents; pass table name, row, and optionally
a dictionary of column(s), timestamp and versions. Examples:
hbase> get 't1', 'r1'
hbase> get 't1', 'r1', {COLUMN => 'c1'}
hbase> get 't1', 'r1', {COLUMN => ['c1', 'c2', 'c3']}
hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1}
hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1, \\
VERSIONS => 4}
list List all tables in hbase
put Put a cell 'value' at specified table/row/column and optionally
timestamp coordinates. To put a cell value into table 't1' at
row 'r1' under column 'c1' marked with the time 'ts1', do:
hbase> put 't1', 'r1', 'c1', 'value', ts1
tools Listing of hbase surgery tools
scan Scan a table; pass table name and optionally a dictionary of scanner
specifications. Scanner specifications may include one or more of
the following: LIMIT, STARTROW, STOPROW, TIMESTAMP, or COLUMNS. If
no columns are specified, all columns will be scanned. To scan all
members of a column family, leave the qualifier empty as in
'col_family:'. Examples:
hbase> scan '.META.'
hbase> scan '.META.', {COLUMNS => 'info:regioninfo'}
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, \\
STARTROW => 'xyz'}
For experts, there is an additional option -- CACHE_BLOCKS -- which
switches block caching for the scanner on (true) or off (false). By
default it is enabled. Examples:
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false}
status Show cluster status. Can be 'summary', 'simple', or 'detailed'. The
default is 'summary'. Examples:
hbase> status
hbase> status 'simple'
hbase> status 'summary'
hbase> status 'detailed'
shutdown Shut down the cluster.
truncate Disables, drops and recreates the specified table.
version Output this HBase version
GENERAL NOTES:
Quote all names in the hbase shell such as table and column names. Don't
forget commas delimit command parameters. Type <RETURN> after entering a
command to run it. Dictionaries of configuration used in the creation and
alteration of tables are ruby Hashes. They look like this:
{'key1' => 'value1', 'key2' => 'value2', ...}
They are opened and closed with curley-braces. Key/values are delimited by
the '=>' character combination. Usually keys are predefined constants such as
NAME, VERSIONS, COMPRESSION, etc. Constants do not need to be quoted. Type
'Object.constants' to see a (messy) list of all constants in the environment.
In case you are using binary keys or values and need to enter them into the
shell then use double-quotes to make use of hexadecimal or octal notations,
for example:
hbase> get 't1', "key\\x03\\x3f\\xcd"
hbase> get 't1', "key\\003\\023\\011"
hbase> put 't1', "test\\xef\\xff", 'f1:', "\\x01\\x33\\x40"
Using the double-quote notation you can directly use the values output by the
shell for example during a "scan" call.
This HBase shell is the JRuby IRB with the above HBase-specific commands added.
For more on the HBase Shell, see http://wiki.apache.org/hadoop/Hbase/Shell
HERE
puts h
end
def version
# Output version.
puts "Version: #{org.apache.hadoop.hbase.util.VersionInfo.getVersion()},\
r#{org.apache.hadoop.hbase.util.VersionInfo.getRevision()},\
#{org.apache.hadoop.hbase.util.VersionInfo.getDate()}"
end
def shutdown
admin().shutdown()
end
# DDL
def admin()
@admin = HBase::Admin.new(@configuration, @formatter) unless @admin
@admin
end
def table(table)
# Create new one each time
HBase::Table.new(@configuration, table, @formatter)
end
def create(table, *args)
admin().create(table, args)
end
def drop(table)
admin().drop(table)
end
def alter(table, args)
admin().alter(table, args)
end
# Administration
def list
admin().list()
end
def describe(table)
admin().describe(table)
end
def enable(table)
admin().enable(table)
end
def disable(table)
admin().disable(table)
end
def enable_region(regionName)
admin().enable_region(regionName)
end
def disable_region(regionName)
admin().disable_region(regionName)
end
def exists(table)
admin().exists(table)
end
def truncate(table)
admin().truncate(table)
end
def close_region(regionName, server = nil)
admin().close_region(regionName, server)
end
def status(format = 'summary')
admin().status(format)
end
def zk(*args)
admin().zk(args)
end
def zk_dump
admin().zk_dump
end
# CRUD
def get(table, row, args = {})
table(table).get(row, args)
end
def put(table, row, column, value, timestamp = nil)
table(table).put(row, column, value, timestamp)
end
def scan(table, args = {})
table(table).scan(args)
end
def delete(table, row, column,
timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP)
table(table).delete(row, column, timestamp)
end
def deleteall(table, row, column = nil,
timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP)
table(table).deleteall(row, column, timestamp)
end
def count(table, interval = 1000)
table(table).count(interval)
end
def flush(tableNameOrRegionName)
admin().flush(tableNameOrRegionName)
end
def compact(tableNameOrRegionName)
admin().compact(tableNameOrRegionName)
end
def major_compact(tableNameOrRegionName)
admin().major_compact(tableNameOrRegionName)
end
def split(tableNameOrRegionName)
admin().split(tableNameOrRegionName)
end
# If script2run, try running it. Will go on to run the shell unless
# script calls 'exit' or 'exit 0' or 'exit errcode'.
load(script2run) if script2run
# Output a banner message that tells users where to go for help
puts <<HERE
HBase Shell; enter 'help<RETURN>' for list of supported commands.
HERE
version
require "irb"
module IRB
# Subclass of IRB so can intercept methods
class HIRB < Irb
def initialize
# This is ugly. Our 'help' method above provokes the following message
# on irb construction: 'irb: warn: can't alias help from irb_help.'
# Below, we reset the output so its pointed at /dev/null during irb
# construction just so this message does not come out after we emit
# the banner. Other attempts at playing with the hash of methods
# down in IRB didn't seem to work. I think the worst thing that can
# happen is the shell exiting because of failed IRB construction with
# no error (though we're not blanking STDERR)
begin
f = File.open("/dev/null", "w")
$stdout = f
super
ensure
f.close()
$stdout = STDOUT
end
end
def output_value
# Suppress output if last_value is 'nil'
# Otherwise, when user types help, get ugly 'nil'
# after all output.
if @context.last_value != nil
super
end
end
end
def IRB.start(ap_path = nil)
$0 = File::basename(ap_path, ".rb") if ap_path
IRB.setup(ap_path)
@CONF[:IRB_NAME] = 'hbase'
@CONF[:AP_NAME] = 'hbase'
if @CONF[:SCRIPT]
hirb = HIRB.new(nil, @CONF[:SCRIPT])
else
hirb = HIRB.new
end
@CONF[:IRB_RC].call(hirb.context) if @CONF[:IRB_RC]
@CONF[:MAIN_CONTEXT] = hirb.context
catch(:IRB_EXIT) do
hirb.eval_input
end
end
end
IRB.start