# File passed to org.jruby.Main by bin/hbase. Pollutes jirb with hbase imports # and hbase commands and then loads jirb. Outputs a banner that tells user # where to find help, shell version, and loads up a custom hirb. # TODO: Add 'debug' support (client-side logs show in shell). Add it as # command-line option and as command. # TODO: Interrupt a table creation or a connection to a bad master. Currently # has to time out. Below we've set down the retries for rpc and hbase but # still can be annoying (And there seem to be times when we'll retry for # ever regardless) # TODO: Add support for listing and manipulating catalog tables, etc. # TODO: Encoding; need to know how to go from ruby String to UTF-8 bytes # Run the java magic include and import basic HBase types that will help ease # hbase hacking. include Java # Some goodies for hirb. Should these be left up to the user's discretion? require 'irb/completion' # Add the $HBASE_HOME/bin directory, the location of this script, to the ruby # load path so I can load up my HBase ruby modules $LOAD_PATH.unshift File.dirname($PROGRAM_NAME) # Require formatter require 'Formatter' # See if there are args for this shell. If any, read and then strip from ARGV # so they don't go through to irb. Output shell 'usage' if user types '--help' cmdline_help = < format_width) # TODO, etc. @formatter = Formatter::XHTML.new(STDOUT) # Set logging level to avoid verboseness logger = org.apache.log4j.Logger.getLogger("org.apache.zookeeper") logger.setLevel(logLevel); logger = org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase") logger.setLevel(logLevel); # Require HBase now after setting log levels require 'HBase' # Setup the HBase module. Create a configuration. # Turn off retries in hbase and ipc. Human doesn't want to wait on N retries. @configuration = org.apache.hadoop.hbase.HBaseConfiguration.new() @configuration.setInt("hbase.client.retries.number", 5) @configuration.setInt("ipc.client.connect.max.retries", 3) # Do lazy create of admin because if we are pointed at bad master, it will hang # shell on startup trying to connect. @admin = nil # Promote hbase constants to be constants of this module so can # be used bare as keys in 'create', 'alter', etc. To see constants # in IRB, type 'Object.constants'. Don't promote defaults because # flattens all types to String. Can be confusing. def promoteConstants(constants) # The constants to import are all in uppercase for c in constants if c == c.upcase eval("%s = \"%s\"" % [c, c]) unless c =~ /DEFAULT_.*/ end end end promoteConstants(org.apache.hadoop.hbase.HColumnDescriptor.constants) promoteConstants(org.apache.hadoop.hbase.HTableDescriptor.constants) promoteConstants(HBase.constants) # Start of the hbase shell commands. # General shell methods def tools # Help for hbase shell surgery tools h = < close_region 'REGIONNAME' hbase> close_region 'REGIONNAME', 'REGIONSERVER_IP:PORT' compact Compact all regions in passed table or pass a region row to compact an individual region disable_region Disable a single region enable_region Enable a single region. For example: hbase> enable_region 'REGIONNAME' flush Flush all regions in passed table or pass a region row to flush an individual region. For example: hbase> flush 'TABLENAME' hbase> flush 'REGIONNAME' major_compact Run major compaction on passed table or pass a region row to major compact an individual region split Split table or pass a region row to split individual region zk Low level ZooKeeper surgery tools. Type "zk 'help'" for more information (Yes, you must quote 'help'). zk_dump Dump status of HBase cluster as seen by ZooKeeper. Above commands are for 'experts'-only as misuse can damage an install HERE puts h end def help # Output help. Help used to be a dictionary of name to short and long # descriptions emitted using Formatters but awkward getting it to show # nicely on console; instead use a HERE document. Means we can't # output help other than on console but not an issue at the moment. # TODO: Add help to the commands themselves rather than keep it distinct h = < alter 't1', {NAME => 'f1', VERSIONS => 5} To delete the 'f1' column family in table 't1', do: hbase> alter 't1', {NAME => 'f1', METHOD => 'delete'} You can also change table-scope attributes like MAX_FILESIZE MEMSTORE_FLUSHSIZE and READONLY. For example, to change the max size of a family to 128MB, do: hbase> alter 't1', {METHOD => 'table_att', MAX_FILESIZE => '134217728'} count Count the number of rows in a table. This operation may take a LONG time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a counting mapreduce job). Current count is shown every 1000 rows by default. Count interval may be optionally specified. Examples: hbase> count 't1' hbase> count 't1', 100000 create Create table; pass table name, a dictionary of specifications per column family, and optionally a dictionary of table configuration. Dictionaries are described below in the GENERAL NOTES section. Examples: hbase> create 't1', {NAME => 'f1', VERSIONS => 5} hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'} hbase> # The above in shorthand would be the following: hbase> create 't1', 'f1', 'f2', 'f3' hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, \\ BLOCKCACHE => true} describe Describe the named table: e.g. "hbase> describe 't1'" delete Put a delete cell value at specified table/row/column and optionally timestamp coordinates. Deletes must match the deleted cell's coordinates exactly. When scanning, a delete cell suppresses older versions. Takes arguments like the 'put' command described below deleteall Delete all cells in a given row; pass a table name, row, and optionally a column and timestamp disable Disable the named table: e.g. "hbase> disable 't1'" drop Drop the named table. Table must first be disabled. If table has more than one region, run a major compaction on .META.: hbase> major_compact ".META." enable Enable the named table exists Does the named table exist? e.g. "hbase> exists 't1'" exit Type "hbase> exit" to leave the HBase Shell get Get row or cell contents; pass table name, row, and optionally a dictionary of column(s), timestamp and versions. Examples: hbase> get 't1', 'r1' hbase> get 't1', 'r1', {COLUMN => 'c1'} hbase> get 't1', 'r1', {COLUMN => ['c1', 'c2', 'c3']} hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1} hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1, \\ VERSIONS => 4} list List all tables in hbase put Put a cell 'value' at specified table/row/column and optionally timestamp coordinates. To put a cell value into table 't1' at row 'r1' under column 'c1' marked with the time 'ts1', do: hbase> put 't1', 'r1', 'c1', 'value', ts1 tools Listing of hbase surgery tools scan Scan a table; pass table name and optionally a dictionary of scanner specifications. Scanner specifications may include one or more of the following: LIMIT, STARTROW, STOPROW, TIMESTAMP, or COLUMNS. If no columns are specified, all columns will be scanned. To scan all members of a column family, leave the qualifier empty as in 'col_family:'. Examples: hbase> scan '.META.' hbase> scan '.META.', {COLUMNS => 'info:regioninfo'} hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, \\ STARTROW => 'xyz'} For experts, there is an additional option -- CACHE_BLOCKS -- which switches block caching for the scanner on (true) or off (false). By default it is enabled. Examples: hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false} status Show cluster status. Can be 'summary', 'simple', or 'detailed'. The default is 'summary'. Examples: hbase> status hbase> status 'simple' hbase> status 'summary' hbase> status 'detailed' shutdown Shut down the cluster. truncate Disables, drops and recreates the specified table. version Output this HBase version GENERAL NOTES: Quote all names in the hbase shell such as table and column names. Don't forget commas delimit command parameters. Type after entering a command to run it. Dictionaries of configuration used in the creation and alteration of tables are ruby Hashes. They look like this: {'key1' => 'value1', 'key2' => 'value2', ...} They are opened and closed with curley-braces. Key/values are delimited by the '=>' character combination. Usually keys are predefined constants such as NAME, VERSIONS, COMPRESSION, etc. Constants do not need to be quoted. Type 'Object.constants' to see a (messy) list of all constants in the environment. In case you are using binary keys or values and need to enter them into the shell then use double-quotes to make use of hexadecimal or octal notations, for example: hbase> get 't1', "key\\x03\\x3f\\xcd" hbase> get 't1', "key\\003\\023\\011" hbase> put 't1', "test\\xef\\xff", 'f1:', "\\x01\\x33\\x40" Using the double-quote notation you can directly use the values output by the shell for example during a "scan" call. This HBase shell is the JRuby IRB with the above HBase-specific commands added. For more on the HBase Shell, see http://wiki.apache.org/hadoop/Hbase/Shell HERE puts h end def version # Output version. puts "Version: #{org.apache.hadoop.hbase.util.VersionInfo.getVersion()},\ r#{org.apache.hadoop.hbase.util.VersionInfo.getRevision()},\ #{org.apache.hadoop.hbase.util.VersionInfo.getDate()}" end def shutdown admin().shutdown() end # DDL def admin() @admin = HBase::Admin.new(@configuration, @formatter) unless @admin @admin end def table(table) # Create new one each time HBase::Table.new(@configuration, table, @formatter) end def create(table, *args) admin().create(table, args) end def drop(table) admin().drop(table) end def alter(table, args) admin().alter(table, args) end # Administration def list admin().list() end def describe(table) admin().describe(table) end def enable(table) admin().enable(table) end def disable(table) admin().disable(table) end def enable_region(regionName) admin().enable_region(regionName) end def disable_region(regionName) admin().disable_region(regionName) end def exists(table) admin().exists(table) end def truncate(table) admin().truncate(table) end def close_region(regionName, server = nil) admin().close_region(regionName, server) end def status(format = 'summary') admin().status(format) end def zk(*args) admin().zk(args) end def zk_dump admin().zk_dump end # CRUD def get(table, row, args = {}) table(table).get(row, args) end def put(table, row, column, value, timestamp = nil) table(table).put(row, column, value, timestamp) end def scan(table, args = {}) table(table).scan(args) end def delete(table, row, column, timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP) table(table).delete(row, column, timestamp) end def deleteall(table, row, column = nil, timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP) table(table).deleteall(row, column, timestamp) end def count(table, interval = 1000) table(table).count(interval) end def flush(tableNameOrRegionName) admin().flush(tableNameOrRegionName) end def compact(tableNameOrRegionName) admin().compact(tableNameOrRegionName) end def major_compact(tableNameOrRegionName) admin().major_compact(tableNameOrRegionName) end def split(tableNameOrRegionName) admin().split(tableNameOrRegionName) end # If script2run, try running it. Will go on to run the shell unless # script calls 'exit' or 'exit 0' or 'exit errcode'. load(script2run) if script2run # Output a banner message that tells users where to go for help puts <' for list of supported commands. HERE version require "irb" module IRB # Subclass of IRB so can intercept methods class HIRB < Irb def initialize # This is ugly. Our 'help' method above provokes the following message # on irb construction: 'irb: warn: can't alias help from irb_help.' # Below, we reset the output so its pointed at /dev/null during irb # construction just so this message does not come out after we emit # the banner. Other attempts at playing with the hash of methods # down in IRB didn't seem to work. I think the worst thing that can # happen is the shell exiting because of failed IRB construction with # no error (though we're not blanking STDERR) begin f = File.open("/dev/null", "w") $stdout = f super ensure f.close() $stdout = STDOUT end end def output_value # Suppress output if last_value is 'nil' # Otherwise, when user types help, get ugly 'nil' # after all output. if @context.last_value != nil super end end end def IRB.start(ap_path = nil) $0 = File::basename(ap_path, ".rb") if ap_path IRB.setup(ap_path) @CONF[:IRB_NAME] = 'hbase' @CONF[:AP_NAME] = 'hbase' @CONF[:BACK_TRACE_LIMIT] = 0 unless $fullBackTrace if @CONF[:SCRIPT] hirb = HIRB.new(nil, @CONF[:SCRIPT]) else hirb = HIRB.new end @CONF[:IRB_RC].call(hirb.context) if @CONF[:IRB_RC] @CONF[:MAIN_CONTEXT] = hirb.context catch(:IRB_EXIT) do hirb.eval_input end end end IRB.start