HBASE-2279 Hbase Shell does not have any tests

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@922110 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-03-12 01:36:04 +00:00
parent 20319e9eab
commit 4770ea178a
51 changed files with 2770 additions and 1234 deletions

2
.gitignore vendored
View File

@ -9,4 +9,6 @@
/contrib/stargate/target/
/contrib/transactional/target/
/core/target/
/core/build/
/core/test/
*.iml

View File

@ -427,6 +427,7 @@ Release 0.21.0 - Unreleased
of the table (Kay Kay via Stack)
HBASE-2309 Add apache releases to pom (list of ) repositories
(Kay Kay via Stack)
HBASE-2279 Hbase Shell does not have any tests (Alexey Kovyrin via Stack)
NEW FEATURES
HBASE-1961 HBase EC2 scripts

View File

@ -1,145 +0,0 @@
# Results formatter
module Formatter
# Base abstract class for results formatting.
class Formatter
def is_kernel?(obj)
obj.kind_of?(Module) and obj.name == "Kernel"
end
# Takes an output stream and a print width.
def initialize(opts={})
defaults = {:output_stream => Kernel, :format_width => 100}
options = defaults.merge(opts)
@out = options[:output_stream]
raise TypeError.new("Type %s of parameter %s is not IO" % [@out.class, @out]) \
unless @out.instance_of? IO or is_kernel?(@out)
@maxWidth = options[:format_width]
@rowCount = 0
end
attr_reader :rowCount
def header(args = [], widths = [])
row(args, false, widths) if args.length > 0
@rowCount = 0
end
# Output a row.
# Inset is whether or not to offset row by a space.
def row(args = [], inset = true, widths = [])
if not args or args.length == 0
# Print out nothing
return
end
if args.class == String
output(@maxWidth, args)
@out.puts
return
end
# TODO: Look at the type. Is it RowResult?
if args.length == 1
splits = split(@maxWidth, dump(args[0]))
for l in splits
output(@maxWidth, l)
@out.puts
end
elsif args.length == 2
col1width = (not widths or widths.length == 0) ? @maxWidth / 4 : @maxWidth * widths[0] / 100
col2width = (not widths or widths.length < 2) ? @maxWidth - col1width - 2 : @maxWidth * widths[1] / 100 - 2
splits1 = split(col1width, dump(args[0]))
splits2 = split(col2width, dump(args[1]))
biggest = (splits2.length > splits1.length)? splits2.length: splits1.length
index = 0
while index < biggest
if inset
# Inset by one space if inset is set.
@out.print(" ")
end
output(col1width, splits1[index])
if not inset
# Add extra space so second column lines up w/ second column output
@out.print(" ")
end
@out.print(" ")
output(col2width, splits2[index])
index += 1
@out.puts
end
else
# Print a space to set off multi-column rows
print ' '
first = true
for e in args
@out.print " " unless first
first = false
@out.print e
end
puts
end
@rowCount += 1
end
def split(width, str)
result = []
index = 0
while index < str.length do
result << str.slice(index, width)
index += width
end
result
end
def dump(str)
if str.instance_of? Fixnum
return
end
# Remove double-quotes added by 'dump'.
return str
end
def output(width, str)
# Make up a spec for printf
spec = "%%-%ds" % width
@out.printf(spec, str)
end
def footer(startTime = nil, rowCount = nil)
if not rowCount
rowCount = @rowCount
end
if not startTime
return
end
# Only output elapsed time and row count if startTime passed
@out.puts("%d row(s) in %.4f seconds" % [rowCount, Time.now - startTime])
end
end
class Console < Formatter
end
class XHTMLFormatter < Formatter
# http://www.germane-software.com/software/rexml/doc/classes/REXML/Document.html
# http://www.crummy.com/writing/RubyCookbook/test_results/75942.html
end
class JSON < Formatter
end
# Do a bit of testing.
if $0 == __FILE__
formatter = Console.new(STDOUT)
now = Time.now
formatter.header(['a', 'b'])
formatter.row(['a', 'b'])
formatter.row(['xxxxxxxxx xxxxxxxxxxx xxxxxxxxxxx xxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxx xxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxxxx'])
formatter.row(['yyyyyy yyyyyy yyyyy yyy', 'xxxxxxxxx xxxxxxxxxxx xxxxxxxxxxx xxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxx xxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxxxx xxx xx x xx xxx xx xx xx x xx x x xxx x x xxx x x xx x x x x x x xx '])
formatter.row(["NAME => 'table1', FAMILIES => [{NAME => 'fam2', VERSIONS => 3, COMPRESSION => 'NONE', IN_MEMORY => false, BLOCKCACHE => false, LENGTH => 2147483647, TTL => FOREVER, BLOOMFILTER => NONE}, {NAME => 'fam1', VERSIONS => 3, COMPRESSION => 'NONE', IN_MEMORY => false, BLOCKCACHE => false, LENGTH => 2147483647, TTL => FOREVER, BLOOMFILTER => NONE}]"])
formatter.footer(now)
end
end

View File

@ -1,676 +0,0 @@
# HBase ruby classes.
# Has wrapper classes for org.apache.hadoop.hbase.client.HBaseAdmin
# and for org.apache.hadoop.hbase.client.HTable. Classes take
# Formatters on construction and outputs any results using
# Formatter methods. These classes are only really for use by
# the hirb.rb HBase Shell script; they don't make much sense elsewhere.
# For example, the exists method on Admin class prints to the formatter
# whether the table exists and returns nil regardless.
include Java
include_class('java.lang.Integer') {|package,name| "J#{name}" }
include_class('java.lang.Long') {|package,name| "J#{name}" }
include_class('java.lang.Boolean') {|package,name| "J#{name}" }
import org.apache.hadoop.hbase.KeyValue
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.client.Delete
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter
import org.apache.hadoop.hbase.HConstants
import org.apache.hadoop.hbase.io.hfile.Compression
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.HColumnDescriptor
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.util.Writables
import org.apache.hadoop.hbase.HRegionInfo
import org.apache.zookeeper.ZooKeeper
import org.apache.zookeeper.ZooKeeperMain
module HBase
COLUMN = "COLUMN"
COLUMNS = "COLUMNS"
TIMESTAMP = "TIMESTAMP"
NAME = HConstants::NAME
VERSIONS = HConstants::VERSIONS
IN_MEMORY = HConstants::IN_MEMORY
STOPROW = "STOPROW"
STARTROW = "STARTROW"
ENDROW = STOPROW
LIMIT = "LIMIT"
METHOD = "METHOD"
MAXLENGTH = "MAXLENGTH"
CACHE_BLOCKS = "CACHE_BLOCKS"
REPLICATION_SCOPE = "REPLICATION_SCOPE"
# Wrapper for org.apache.hadoop.hbase.client.HBaseAdmin
class Admin
def initialize(configuration, formatter)
@admin = HBaseAdmin.new(configuration)
connection = @admin.getConnection()
@zkWrapper = connection.getZooKeeperWrapper()
zk = @zkWrapper.getZooKeeper()
@zkMain = ZooKeeperMain.new(zk)
@formatter = formatter
end
def list
now = Time.now
@formatter.header()
for t in @admin.listTables()
@formatter.row([t.getNameAsString()])
end
@formatter.footer(now)
end
def describe(tableName)
now = Time.now
@formatter.header(["DESCRIPTION", "ENABLED"], [64])
found = false
tables = @admin.listTables().to_a
tables.push(HTableDescriptor::META_TABLEDESC, HTableDescriptor::ROOT_TABLEDESC)
for t in tables
if t.getNameAsString() == tableName
@formatter.row([t.to_s, "%s" % [@admin.isTableEnabled(tableName)]], true, [64])
found = true
end
end
if not found
raise ArgumentError.new("Failed to find table named " + tableName)
end
@formatter.footer(now)
end
def exists(tableName)
now = Time.now
@formatter.header()
e = @admin.tableExists(tableName)
@formatter.row([e.to_s])
@formatter.footer(now)
end
def flush(tableNameOrRegionName)
now = Time.now
@formatter.header()
@admin.flush(tableNameOrRegionName)
@formatter.footer(now)
end
def compact(tableNameOrRegionName)
now = Time.now
@formatter.header()
@admin.compact(tableNameOrRegionName)
@formatter.footer(now)
end
def major_compact(tableNameOrRegionName)
now = Time.now
@formatter.header()
@admin.majorCompact(tableNameOrRegionName)
@formatter.footer(now)
end
def split(tableNameOrRegionName)
now = Time.now
@formatter.header()
@admin.split(tableNameOrRegionName)
@formatter.footer(now)
end
def enable(tableName)
# TODO: Need an isEnabled method
now = Time.now
@admin.enableTable(tableName)
@formatter.header()
@formatter.footer(now)
end
def disable(tableName)
# TODO: Need an isDisabled method
now = Time.now
@admin.disableTable(tableName)
@formatter.header()
@formatter.footer(now)
end
def enable_region(regionName)
online(regionName, false)
end
def disable_region(regionName)
online(regionName, true)
end
def online(regionName, onOrOff)
now = Time.now
meta = HTable.new(HConstants::META_TABLE_NAME)
bytes = Bytes.toBytes(regionName)
g = Get.new(bytes)
g.addColumn(HConstants::CATALOG_FAMILY,
HConstants::REGIONINFO_QUALIFIER)
hriBytes = meta.get(g).value()
hri = Writables.getWritable(hriBytes, HRegionInfo.new());
hri.setOffline(onOrOff)
put = Put.new(bytes)
put.add(HConstants::CATALOG_FAMILY,
HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))
meta.put(put);
@formatter.header()
@formatter.footer(now)
end
def drop(tableName)
now = Time.now
@formatter.header()
if @admin.isTableEnabled(tableName)
raise IOError.new("Table " + tableName + " is enabled. Disable it first")
else
@admin.deleteTable(tableName)
flush(HConstants::META_TABLE_NAME);
major_compact(HConstants::META_TABLE_NAME);
end
@formatter.footer(now)
end
def truncate(tableName)
now = Time.now
@formatter.header()
hTable = HTable.new(tableName)
tableDescription = hTable.getTableDescriptor()
puts 'Truncating ' + tableName + '; it may take a while'
puts 'Disabling table...'
disable(tableName)
puts 'Dropping table...'
drop(tableName)
puts 'Creating table...'
@admin.createTable(tableDescription)
@formatter.footer(now)
end
# Pass tablename and an array of Hashes
def create(tableName, args)
now = Time.now
# Pass table name and an array of Hashes. Later, test the last
# array to see if its table options rather than column family spec.
raise TypeError.new("Table name must be of type String") \
unless tableName.instance_of? String
# For now presume all the rest of the args are column family
# hash specifications. TODO: Add table options handling.
htd = HTableDescriptor.new(tableName)
for arg in args
if arg.instance_of? String
htd.addFamily(HColumnDescriptor.new(arg))
else
raise TypeError.new(arg.class.to_s + " of " + arg.to_s + " is not of Hash type") \
unless arg.instance_of? Hash
htd.addFamily(hcd(arg))
end
end
@admin.createTable(htd)
@formatter.header()
@formatter.footer(now)
end
def alter(tableName, args)
now = Time.now
raise TypeError.new("Table name must be of type String") \
unless tableName.instance_of? String
htd = @admin.getTableDescriptor(tableName.to_java_bytes)
method = args.delete(METHOD)
if method == "delete"
@admin.deleteColumn(tableName, args[NAME])
elsif method == "table_att"
if args[MAX_FILESIZE]
htd.setMaxFileSize(JLong.valueOf(args[MAX_FILESIZE]))
end
if args[READONLY]
htd.setReadOnly(JBoolean.valueOf(args[READONLY]))
end
if args[MEMSTORE_FLUSHSIZE]
htd.setMemStoreFlushSize(JLong.valueOf(args[MEMSTORE_FLUSHSIZE]))
end
if args[DEFERRED_LOG_FLUSH]
htd.setDeferredLogFlush(JBoolean.valueOf(args[DEFERRED_LOG_FLUSH]))
end
@admin.modifyTable(tableName.to_java_bytes, htd)
else
descriptor = hcd(args)
if (htd.hasFamily(descriptor.getNameAsString().to_java_bytes))
@admin.modifyColumn(tableName, descriptor.getNameAsString(),
descriptor);
else
@admin.addColumn(tableName, descriptor);
end
end
@formatter.header()
@formatter.footer(now)
end
def close_region(regionName, server)
now = Time.now
s = nil
s = [server].to_java if server
@admin.closeRegion(regionName, s)
@formatter.header()
@formatter.footer(now)
end
def shutdown()
@admin.shutdown()
end
def status(format)
status = @admin.getClusterStatus()
if format != nil and format == "detailed"
puts("version %s" % [ status.getHBaseVersion() ])
# Put regions in transition first because usually empty
puts("%d regionsInTransition" % status.getRegionsInTransition().size())
for k, v in status.getRegionsInTransition()
puts(" %s" % [v])
end
puts("%d live servers" % [ status.getServers() ])
for server in status.getServerInfo()
puts(" %s:%d %d" % \
[ server.getServerAddress().getHostname(), \
server.getServerAddress().getPort(), server.getStartCode() ])
puts(" %s" % [ server.getLoad().toString() ])
for region in server.getLoad().getRegionsLoad()
puts(" %s" % [ region.getNameAsString() ])
puts(" %s" % [ region.toString() ])
end
end
puts("%d dead servers" % [ status.getDeadServers() ])
for server in status.getDeadServerNames()
puts(" %s" % [ server ])
end
elsif format != nil and format == "simple"
load = 0
regions = 0
puts("%d live servers" % [ status.getServers() ])
for server in status.getServerInfo()
puts(" %s:%d %d" % \
[ server.getServerAddress().getHostname(), \
server.getServerAddress().getPort(), server.getStartCode() ])
puts(" %s" % [ server.getLoad().toString() ])
load += server.getLoad().getNumberOfRequests()
regions += server.getLoad().getNumberOfRegions()
end
puts("%d dead servers" % [ status.getDeadServers() ])
for server in status.getDeadServerNames()
puts(" %s" % [ server ])
end
puts("Aggregate load: %d, regions: %d" % [ load , regions ] )
else
puts("%d servers, %d dead, %.4f average load" % \
[ status.getServers(), status.getDeadServers(), \
status.getAverageLoad()])
end
end
def hcd(arg)
# Return a new HColumnDescriptor made of passed args
# TODO: This is brittle code.
# Here is current HCD constructor:
# public HColumnDescriptor(final byte [] familyName, final int maxVersions,
# final String compression, final boolean inMemory,
# final boolean blockCacheEnabled, final int blocksize,
# final int timeToLive, final boolean bloomFilter, final int scope) {
name = arg[NAME]
raise ArgumentError.new("Column family " + arg + " must have a name") \
unless name
# TODO: What encoding are Strings in jruby?
return HColumnDescriptor.new(name.to_java_bytes,
# JRuby uses longs for ints. Need to convert. Also constants are String
arg[VERSIONS]? JInteger.new(arg[VERSIONS]): HColumnDescriptor::DEFAULT_VERSIONS,
arg[HColumnDescriptor::COMPRESSION]? arg[HColumnDescriptor::COMPRESSION]: HColumnDescriptor::DEFAULT_COMPRESSION,
arg[IN_MEMORY]? JBoolean.valueOf(arg[IN_MEMORY]): HColumnDescriptor::DEFAULT_IN_MEMORY,
arg[HColumnDescriptor::BLOCKCACHE]? JBoolean.valueOf(arg[HColumnDescriptor::BLOCKCACHE]): HColumnDescriptor::DEFAULT_BLOCKCACHE,
arg[HColumnDescriptor::BLOCKSIZE]? JInteger.valueOf(arg[HColumnDescriptor::BLOCKSIZE]): HColumnDescriptor::DEFAULT_BLOCKSIZE,
arg[HColumnDescriptor::TTL]? JInteger.new(arg[HColumnDescriptor::TTL]): HColumnDescriptor::DEFAULT_TTL,
arg[HColumnDescriptor::BLOOMFILTER]? JBoolean.valueOf(arg[HColumnDescriptor::BLOOMFILTER]): HColumnDescriptor::DEFAULT_BLOOMFILTER,
arg[HColumnDescriptor::REPLICATION_SCOPE]? JInteger.new(arg[REPLICATION_SCOPE]): HColumnDescriptor::DEFAULT_REPLICATION_SCOPE)
end
def zk(args)
line = args.join(' ')
line = 'help' if line.empty?
@zkMain.executeLine(line)
end
def zk_dump
puts @zkWrapper.dump
end
end
# Wrapper for org.apache.hadoop.hbase.client.HTable
class Table
def initialize(configuration, tableName, formatter)
@table = HTable.new(configuration, tableName)
@formatter = formatter
end
# Delete a cell
def delete(row, column, timestamp = HConstants::LATEST_TIMESTAMP)
now = Time.now
d = Delete.new(row.to_java_bytes, timestamp, nil)
split = KeyValue.parseColumn(column.to_java_bytes)
d.deleteColumn(split[0], split.length > 1 ? split[1] : nil, timestamp)
@table.delete(d)
@formatter.header()
@formatter.footer(now)
end
def deleteall(row, column = nil, timestamp = HConstants::LATEST_TIMESTAMP)
now = Time.now
d = Delete.new(row.to_java_bytes, timestamp, nil)
if column != nil
split = KeyValue.parseColumn(column.to_java_bytes)
d.deleteColumns(split[0], split.length > 1 ? split[1] : nil, timestamp)
end
@table.delete(d)
@formatter.header()
@formatter.footer(now)
end
def getAllColumns
htd = @table.getTableDescriptor()
result = []
for f in htd.getFamilies()
n = f.getNameAsString()
n << ':'
result << n
end
result
end
def scan(args = {})
now = Time.now
limit = -1
maxlength = -1
if args != nil and args.length > 0
limit = args["LIMIT"] || -1
maxlength = args["MAXLENGTH"] || -1
filter = args["FILTER"] || nil
startrow = args["STARTROW"] || ""
stoprow = args["STOPROW"] || nil
timestamp = args["TIMESTAMP"] || nil
columns = args["COLUMNS"] || getAllColumns()
cache = args["CACHE_BLOCKS"] || true
versions = args["VERSIONS"] || 1
if columns.class == String
columns = [columns]
elsif columns.class != Array
raise ArgumentError.new("COLUMNS must be specified as a String or an Array")
end
if stoprow
scan = Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes)
else
scan = Scan.new(startrow.to_java_bytes)
end
for c in columns
scan.addColumns(c)
end
if filter != nil
scan.setFilter(filter)
end
if timestamp != nil
scan.setTimeStamp(timestamp)
end
scan.setCacheBlocks(cache)
scan.setMaxVersions(versions) if versions > 1
else
scan = Scan.new()
end
s = @table.getScanner(scan)
count = 0
@formatter.header(["ROW", "COLUMN+CELL"])
i = s.iterator()
while i.hasNext()
r = i.next()
row = Bytes::toStringBinary(r.getRow())
if limit != -1 and count >= limit
break
end
for kv in r.list
family = String.from_java_bytes kv.getFamily()
qualifier = Bytes::toStringBinary(kv.getQualifier())
column = family + ':' + qualifier
cell = toString(column, kv, maxlength)
@formatter.row([row, "column=%s, %s" % [column, cell]])
end
count += 1
end
@formatter.footer(now, count)
end
def put(row, column, value, timestamp = nil)
now = Time.now
p = Put.new(row.to_java_bytes)
split = KeyValue.parseColumn(column.to_java_bytes)
if split.length > 1
if timestamp
p.add(split[0], split[1], timestamp, value.to_java_bytes)
else
p.add(split[0], split[1], value.to_java_bytes)
end
else
if timestamp
p.add(split[0], nil, timestamp, value.to_java_bytes)
else
p.add(split[0], nil, value.to_java_bytes)
end
end
@table.put(p)
@formatter.header()
@formatter.footer(now)
end
def incr(row, column, value = nil)
now = Time.now
split = KeyValue.parseColumn(column.to_java_bytes)
family = split[0]
qualifier = nil
if split.length > 1
qualifier = split[1]
end
if value == nil
value = 1
end
@table.incrementColumnValue(row.to_java_bytes, family, qualifier, value)
@formatter.header()
@formatter.footer(now)
end
def isMetaTable()
tn = @table.getTableName()
return Bytes.equals(tn, HConstants::META_TABLE_NAME) ||
Bytes.equals(tn, HConstants::ROOT_TABLE_NAME)
end
# Make a String of the passed kv
# Intercept cells whose format we know such as the info:regioninfo in .META.
def toString(column, kv, maxlength)
if isMetaTable()
if column == 'info:regioninfo'
hri = Writables.getHRegionInfoOrNull(kv.getValue())
return "timestamp=%d, value=%s" % [kv.getTimestamp(), hri.toString()]
elsif column == 'info:serverstartcode'
return "timestamp=%d, value=%s" % [kv.getTimestamp(), \
Bytes.toLong(kv.getValue())]
end
end
val = "timestamp=" + kv.getTimestamp().to_s + ", value=" + Bytes::toStringBinary(kv.getValue())
maxlength != -1 ? val[0, maxlength] : val
end
# Get from table
def get(row, args = {})
now = Time.now
result = nil
if args == nil or args.length == 0 or (args.length == 1 and args[MAXLENGTH] != nil)
get = Get.new(row.to_java_bytes)
else
# Its a hash.
columns = args[COLUMN]
if columns == nil
# Maybe they used the COLUMNS key
columns = args[COLUMNS]
end
if columns == nil
# May have passed TIMESTAMP and row only; wants all columns from ts.
ts = args[TIMESTAMP]
if not ts
raise ArgumentError, "Failed parse of #{args}, #{args.class}"
end
get = Get.new(row.to_java_bytes, ts)
else
get = Get.new(row.to_java_bytes)
# Columns are non-nil
if columns.class == String
# Single column
split = KeyValue.parseColumn(columns.to_java_bytes)
if (split.length > 1)
get.addColumn(split[0], split[1])
else
get.addFamily(split[0])
end
elsif columns.class == Array
for column in columns
split = KeyValue.parseColumn(columns.to_java_bytes)
if (split.length > 1)
get.addColumn(split[0], split[1])
else
get.addFamily(split[0])
end
end
else
raise ArgumentError.new("Failed parse column argument type " +
args + ", " + args.class)
end
get.setMaxVersions(args[VERSIONS] ? args[VERSIONS] : 1)
if args[TIMESTAMP]
get.setTimeStamp(args[TIMESTAMP])
end
end
end
result = @table.get(get)
# Print out results. Result can be Cell or RowResult.
maxlength = args[MAXLENGTH] || -1
@formatter.header(["COLUMN", "CELL"])
if !result.isEmpty()
for kv in result.list()
family = String.from_java_bytes kv.getFamily()
qualifier = Bytes::toStringBinary(kv.getQualifier())
column = family + ':' + qualifier
@formatter.row([column, toString(column, kv, maxlength)])
end
end
@formatter.footer(now)
end
def count(interval = 1000)
now = Time.now
scan = Scan.new()
scan.setCacheBlocks(false)
# We can safely set scanner caching with the first key only filter
scan.setCaching(10)
scan.setFilter(FirstKeyOnlyFilter.new())
s = @table.getScanner(scan)
count = 0
i = s.iterator()
@formatter.header()
while i.hasNext()
r = i.next()
count += 1
if count % interval == 0
@formatter.row(["Current count: " + count.to_s + ", row: " + \
(String.from_java_bytes r.getRow())])
end
end
@formatter.footer(now, count)
end
end
# Testing. To run this test, there needs to be an hbase cluster up and
# running. Then do: ${HBASE_HOME}/bin/hbase org.jruby.Main bin/HBase.rb
if $0 == __FILE__
# Add this directory to LOAD_PATH; presumption is that Formatter module
# sits beside this one. Then load it up.
$LOAD_PATH.unshift File.dirname($PROGRAM_NAME)
require 'Formatter'
# Make a console formatter
formatter = Formatter::Console.new(STDOUT)
# Now add in java and hbase classes
configuration = HBaseConfiguration.new()
admin = Admin.new(configuration, formatter)
# Drop old table. If it does not exist, get an exception. Catch and
# continue
TESTTABLE = "HBase_rb_testtable"
begin
admin.disable(TESTTABLE)
admin.drop(TESTTABLE)
rescue org.apache.hadoop.hbase.TableNotFoundException
# Just suppress not found exception
end
admin.create(TESTTABLE, [{NAME => 'x', VERSIONS => 5}])
# Presume it exists. If it doesn't, next items will fail.
table = Table.new(configuration, TESTTABLE, formatter)
for i in 1..10
table.put('x%d' % i, 'x:%d' % i, 'x%d' % i)
end
table.get('x1', {COLUMNS => 'x:1'})
if formatter.rowCount() != 1
raise IOError.new("Failed first put")
end
table.scan({COLUMNS => ['x:']})
if formatter.rowCount() != 10
raise IOError.new("Failed scan of expected 10 rows")
end
# Verify that limit works.
table.scan({COLUMNS => ['x:'], LIMIT => 4})
if formatter.rowCount() != 3
raise IOError.new("Failed scan of expected 3 rows")
end
# Should only be two rows if we start at 8 (Row x10 sorts beside x1).
table.scan({COLUMNS => ['x:'], STARTROW => 'x8', LIMIT => 3})
if formatter.rowCount() != 2
raise IOError.new("Failed scan of expected 2 rows")
end
# Scan between two rows
table.scan({COLUMNS => ['x:'], STARTROW => 'x5', ENDROW => 'x8'})
if formatter.rowCount() != 3
raise IOError.new("Failed endrow test")
end
# Verify that incr works
table.incr('incr1', 'c:1');
table.scan({COLUMNS => ['c:1']})
if formatter.rowCount() != 1
raise IOError.new("Failed incr test")
end
# Verify that delete works
table.delete('x1', 'x:1');
table.scan({COLUMNS => ['x:1']})
scan1 = formatter.rowCount()
table.scan({COLUMNS => ['x:']})
scan2 = formatter.rowCount()
if scan1 != 0 or scan2 != 9
raise IOError.new("Failed delete test")
end
# Verify that deletall works
table.put('x2', 'x:1', 'x:1')
table.deleteall('x2')
table.scan({COLUMNS => ['x:2']})
scan1 = formatter.rowCount()
table.scan({COLUMNS => ['x:']})
scan2 = formatter.rowCount()
if scan1 != 0 or scan2 != 8
raise IOError.new("Failed deleteall test")
end
admin.disable(TESTTABLE)
admin.drop(TESTTABLE)
end
end

View File

@ -18,12 +18,17 @@ include Java
# Some goodies for hirb. Should these be left up to the user's discretion?
require 'irb/completion'
# Add the $HBASE_HOME/bin directory, the location of this script, to the ruby
# load path so I can load up my HBase ruby modules
$LOAD_PATH.unshift File.dirname($PROGRAM_NAME)
# Require formatter
require 'Formatter'
# Add the $HBASE_HOME/lib/ruby OR $HBASE_HOME/core/src/main/ruby/lib directory
# to the ruby load path so I can load up my HBase ruby modules
if File.exists?(File.join(File.dirname(__FILE__), "..", "lib", "ruby", "hbase.rb"))
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "..", "lib", "ruby")
else
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "..", "core", "src", "main", "ruby")
end
#
# FIXME: Switch args processing to getopt
#
# See if there are args for this shell. If any, read and then strip from ARGV
# so they don't go through to irb. Output shell 'usage' if user types '--help'
cmdline_help = <<HERE # HERE document output as shell usage
@ -36,7 +41,7 @@ found = []
format = 'console'
format_width = 110
script2run = nil
logLevel = org.apache.log4j.Level::ERROR
log_level = org.apache.log4j.Level::ERROR
for arg in ARGV
if arg =~ /^--format=(.+)/i
format = $1
@ -55,9 +60,9 @@ for arg in ARGV
puts cmdline_help
exit
elsif arg == '-d' || arg == '--debug'
logLevel = org.apache.log4j.Level::DEBUG
log_level = org.apache.log4j.Level::DEBUG
$fullBackTrace = true
puts "Setting DEBUG log level..."
puts "Setting DEBUG log level..."
else
# Presume it a script. Save it off for running later below
# after we've set up some environment.
@ -67,439 +72,68 @@ for arg in ARGV
break
end
end
for arg in found
ARGV.delete(arg)
end
# Delete all processed args
found.each { |arg| ARGV.delete(arg) }
# Set logging level to avoid verboseness
org.apache.log4j.Logger.getLogger("org.apache.zookeeper").setLevel(log_level)
org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase").setLevel(log_level)
# Require HBase now after setting log levels
require 'hbase'
# Load hbase shell
require 'shell'
# Require formatter
require 'shell/formatter'
# Presume console format.
# Formatter takes an :output_stream parameter, if you don't want STDOUT.
@formatter = Formatter::Console.new(:format_width => format_width)
# TODO, etc. @formatter = Formatter::XHTML.new(STDOUT)
# Set logging level to avoid verboseness
logger = org.apache.log4j.Logger.getLogger("org.apache.zookeeper")
logger.setLevel(logLevel);
logger = org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase")
logger.setLevel(logLevel);
# Require HBase now after setting log levels
require 'HBase'
@formatter = Shell::Formatter::Console.new(:format_width => format_width)
# Setup the HBase module. Create a configuration.
# Turn off retries in hbase and ipc. Human doesn't want to wait on N retries.
@configuration = org.apache.hadoop.hbase.HBaseConfiguration.new()
@configuration.setInt("hbase.client.retries.number", 7)
@configuration.setInt("ipc.client.connect.max.retries", 3)
@hbase = Hbase::Hbase.new
# Do lazy create of admin because if we are pointed at bad master, it will hang
# shell on startup trying to connect.
@admin = nil
# Setup console
@shell = Shell::Shell.new(@hbase, @formatter)
# Promote hbase constants to be constants of this module so can
# be used bare as keys in 'create', 'alter', etc. To see constants
# in IRB, type 'Object.constants'. Don't promote defaults because
# flattens all types to String. Can be confusing.
def promoteConstants(constants)
# The constants to import are all in uppercase
for c in constants
if c == c.upcase
eval("%s = \"%s\"" % [c, c]) unless c =~ /DEFAULT_.*/
end
end
# Add commands to this namespace
@shell.export_commands(self)
# Add help command
def help(command = nil)
@shell.help(command)
end
promoteConstants(org.apache.hadoop.hbase.HColumnDescriptor.constants)
promoteConstants(org.apache.hadoop.hbase.HTableDescriptor.constants)
promoteConstants(HBase.constants)
# Start of the hbase shell commands.
# General shell methods
# Backwards compatibility method
def tools
# Help for hbase shell surgery tools
h = <<HERE
HBASE SURGERY TOOLS:
close_region Close a single region. Optionally specify regionserver.
Examples:
hbase> close_region 'REGIONNAME'
hbase> close_region 'REGIONNAME', 'REGIONSERVER_IP:PORT'
compact Compact all regions in passed table or pass a region row
to compact an individual region
disable_region Disable a single region
enable_region Enable a single region. For example:
hbase> enable_region 'REGIONNAME'
flush Flush all regions in passed table or pass a region row to
flush an individual region. For example:
hbase> flush 'TABLENAME'
hbase> flush 'REGIONNAME'
major_compact Run major compaction on passed table or pass a region row
to major compact an individual region
split Split table or pass a region row to split individual region
zk Low level ZooKeeper surgery tools. Type "zk 'help'" for more
information (Yes, you must quote 'help').
zk_dump Dump status of HBase cluster as seen by ZooKeeper.
Above commands are for 'experts'-only as misuse can damage an install
HERE
puts h
end
def help
# Output help. Help used to be a dictionary of name to short and long
# descriptions emitted using Formatters but awkward getting it to show
# nicely on console; instead use a HERE document. Means we can't
# output help other than on console but not an issue at the moment.
# TODO: Add help to the commands themselves rather than keep it distinct
h = <<HERE
HBASE SHELL COMMANDS:
alter Alter column family schema; pass table name and a dictionary
specifying new column family schema. Dictionaries are described
below in the GENERAL NOTES section. Dictionary must include name
of column family to alter. For example,
To change or add the 'f1' column family in table 't1' from defaults
to instead keep a maximum of 5 cell VERSIONS, do:
hbase> alter 't1', {NAME => 'f1', VERSIONS => 5}
To delete the 'f1' column family in table 't1', do:
hbase> alter 't1', {NAME => 'f1', METHOD => 'delete'}
You can also change table-scope attributes like MAX_FILESIZE
MEMSTORE_FLUSHSIZE, READONLY, and DEFERRED_LOG_FLUSH.
For example, to change the max size of a family to 128MB, do:
hbase> alter 't1', {METHOD => 'table_att', MAX_FILESIZE => '134217728'}
count Count the number of rows in a table. This operation may take a LONG
time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a
counting mapreduce job). Current count is shown every 1000 rows by
default. Count interval may be optionally specified. Examples:
hbase> count 't1'
hbase> count 't1', 100000
create Create table; pass table name, a dictionary of specifications per
column family, and optionally a dictionary of table configuration.
Dictionaries are described below in the GENERAL NOTES section.
Examples:
hbase> create 't1', {NAME => 'f1', VERSIONS => 5}
hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
hbase> # The above in shorthand would be the following:
hbase> create 't1', 'f1', 'f2', 'f3'
hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, \\
BLOCKCACHE => true}
describe Describe the named table: e.g. "hbase> describe 't1'"
delete Put a delete cell value at specified table/row/column and optionally
timestamp coordinates. Deletes must match the deleted cell's
coordinates exactly. When scanning, a delete cell suppresses older
versions. Takes arguments like the 'put' command described below
deleteall Delete all cells in a given row; pass a table name, row, and optionally
a column and timestamp
disable Disable the named table: e.g. "hbase> disable 't1'"
drop Drop the named table. Table must first be disabled. If table has
more than one region, run a major compaction on .META.:
hbase> major_compact ".META."
enable Enable the named table
exists Does the named table exist? e.g. "hbase> exists 't1'"
exit Type "hbase> exit" to leave the HBase Shell
get Get row or cell contents; pass table name, row, and optionally
a dictionary of column(s), timestamp and versions. Examples:
hbase> get 't1', 'r1'
hbase> get 't1', 'r1', {COLUMN => 'c1'}
hbase> get 't1', 'r1', {COLUMN => ['c1', 'c2', 'c3']}
hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1}
hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1, \\
VERSIONS => 4}
incr Increments a cell 'value' at specified table/row/column coordinates.
To increment a cell value in table 't1' at row 'r1' under column
'c1' by 1 (can be omitted) or 10 do:
hbase> incr 't1', 'r1', 'c1'
hbase> incr 't1', 'r1', 'c1', 1
hbase> incr 't1', 'r1', 'c1', 10
list List all tables in hbase
put Put a cell 'value' at specified table/row/column and optionally
timestamp coordinates. To put a cell value into table 't1' at
row 'r1' under column 'c1' marked with the time 'ts1', do:
hbase> put 't1', 'r1', 'c1', 'value', ts1
tools Listing of hbase surgery tools
scan Scan a table; pass table name and optionally a dictionary of scanner
specifications. Scanner specifications may include one or more of
the following: LIMIT, STARTROW, STOPROW, TIMESTAMP, or COLUMNS. If
no columns are specified, all columns will be scanned. To scan all
members of a column family, leave the qualifier empty as in
'col_family:'. Examples:
hbase> scan '.META.'
hbase> scan '.META.', {COLUMNS => 'info:regioninfo'}
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, \\
STARTROW => 'xyz'}
For experts, there is an additional option -- CACHE_BLOCKS -- which
switches block caching for the scanner on (true) or off (false). By
default it is enabled. Examples:
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false}
status Show cluster status. Can be 'summary', 'simple', or 'detailed'. The
default is 'summary'. Examples:
hbase> status
hbase> status 'simple'
hbase> status 'summary'
hbase> status 'detailed'
shutdown Shut down the cluster.
truncate Disables, drops and recreates the specified table.
version Output this HBase version
GENERAL NOTES:
Quote all names in the hbase shell such as table and column names. Don't
forget commas delimit command parameters. Type <RETURN> after entering a
command to run it. Dictionaries of configuration used in the creation and
alteration of tables are ruby Hashes. They look like this:
{'key1' => 'value1', 'key2' => 'value2', ...}
They are opened and closed with curley-braces. Key/values are delimited by
the '=>' character combination. Usually keys are predefined constants such as
NAME, VERSIONS, COMPRESSION, etc. Constants do not need to be quoted. Type
'Object.constants' to see a (messy) list of all constants in the environment.
In case you are using binary keys or values and need to enter them into the
shell then use double-quotes to make use of hexadecimal for example:
hbase> get 't1', "key\\x03\\x3f\\xcd"
hbase> get 't1', "key\\003\\023\\011"
hbase> put 't1', "test\\xef\\xff", 'f1:', "\\x01\\x33\\x40"
Using the double-quote notation you can directly use the values output by the
shell for example during a "scan" call.
This HBase shell is the JRuby IRB with the above HBase-specific commands added.
For more on the HBase Shell, see http://wiki.apache.org/hadoop/Hbase/Shell
HERE
puts h
end
def version
# Output version.
puts "Version: #{org.apache.hadoop.hbase.util.VersionInfo.getVersion()},\
r#{org.apache.hadoop.hbase.util.VersionInfo.getRevision()},\
#{org.apache.hadoop.hbase.util.VersionInfo.getDate()}"
end
def shutdown
admin().shutdown()
end
# DDL
def admin()
@admin = HBase::Admin.new(@configuration, @formatter) unless @admin
@admin
end
def table(table)
# Create new one each time
HBase::Table.new(@configuration, table, @formatter)
end
def create(table, *args)
admin().create(table, args)
end
def drop(table)
admin().drop(table)
end
def alter(table, args)
admin().alter(table, args)
end
# Administration
def list
admin().list()
end
def describe(table)
admin().describe(table)
end
def enable(table)
admin().enable(table)
end
def disable(table)
admin().disable(table)
end
def enable_region(regionName)
admin().enable_region(regionName)
end
def disable_region(regionName)
admin().disable_region(regionName)
end
def exists(table)
admin().exists(table)
end
def truncate(table)
admin().truncate(table)
end
def close_region(regionName, server = nil)
admin().close_region(regionName, server)
end
def status(format = 'summary')
admin().status(format)
end
def zk(*args)
admin().zk(args)
end
def zk_dump
admin().zk_dump
end
# CRUD
def get(table, row, args = {})
table(table).get(row, args)
end
def put(table, row, column, value, timestamp = nil)
table(table).put(row, column, value, timestamp)
end
def incr(table, row, column, value = nil)
table(table).incr(row, column, value)
end
def scan(table, args = {})
table(table).scan(args)
end
def delete(table, row, column,
timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP)
table(table).delete(row, column, timestamp)
end
def deleteall(table, row, column = nil,
timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP)
table(table).deleteall(row, column, timestamp)
end
def count(table, interval = 1000)
table(table).count(interval)
end
def flush(tableNameOrRegionName)
admin().flush(tableNameOrRegionName)
end
def compact(tableNameOrRegionName)
admin().compact(tableNameOrRegionName)
end
def major_compact(tableNameOrRegionName)
admin().major_compact(tableNameOrRegionName)
end
def split(tableNameOrRegionName)
admin().split(tableNameOrRegionName)
@shell.help_group('tools')
end
# Include hbase constants
include HBaseConstants
# If script2run, try running it. Will go on to run the shell unless
# script calls 'exit' or 'exit 0' or 'exit errcode'.
load(script2run) if script2run
# Output a banner message that tells users where to go for help
puts <<HERE
HBase Shell; enter 'help<RETURN>' for list of supported commands.
HERE
version
@shell.print_banner
require "irb"
require 'irb/hirb'
module IRB
# Subclass of IRB so can intercept methods
class HIRB < Irb
def initialize
# This is ugly. Our 'help' method above provokes the following message
# on irb construction: 'irb: warn: can't alias help from irb_help.'
# Below, we reset the output so its pointed at /dev/null during irb
# construction just so this message does not come out after we emit
# the banner. Other attempts at playing with the hash of methods
# down in IRB didn't seem to work. I think the worst thing that can
# happen is the shell exiting because of failed IRB construction with
# no error (though we're not blanking STDERR)
begin
f = File.open("/dev/null", "w")
$stdout = f
super
ensure
f.close()
$stdout = STDOUT
end
end
def output_value
# Suppress output if last_value is 'nil'
# Otherwise, when user types help, get ugly 'nil'
# after all output.
if @context.last_value != nil
super
end
end
end
def IRB.start(ap_path = nil)
def self.start(ap_path = nil)
$0 = File::basename(ap_path, ".rb") if ap_path
IRB.setup(ap_path)
@CONF[:IRB_NAME] = 'hbase'
@CONF[:AP_NAME] = 'hbase'
@CONF[:BACK_TRACE_LIMIT] = 0 unless $fullBackTrace
if @CONF[:SCRIPT]
hirb = HIRB.new(nil, @CONF[:SCRIPT])
else

View File

@ -0,0 +1,52 @@
# HBase ruby classes.
# Has wrapper classes for org.apache.hadoop.hbase.client.HBaseAdmin
# and for org.apache.hadoop.hbase.client.HTable. Classes take
# Formatters on construction and outputs any results using
# Formatter methods. These classes are only really for use by
# the hirb.rb HBase Shell script; they don't make much sense elsewhere.
# For example, the exists method on Admin class prints to the formatter
# whether the table exists and returns nil regardless.
include Java
java_import org.apache.hadoop.hbase.HConstants
java_import org.apache.hadoop.hbase.HColumnDescriptor
java_import org.apache.hadoop.hbase.HTableDescriptor
include_class('java.lang.Integer') {|package,name| "J#{name}" }
include_class('java.lang.Long') {|package,name| "J#{name}" }
include_class('java.lang.Boolean') {|package,name| "J#{name}" }
module HBaseConstants
COLUMN = "COLUMN"
COLUMNS = "COLUMNS"
TIMESTAMP = "TIMESTAMP"
NAME = HConstants::NAME
VERSIONS = HConstants::VERSIONS
IN_MEMORY = HConstants::IN_MEMORY
STOPROW = "STOPROW"
STARTROW = "STARTROW"
ENDROW = STOPROW
LIMIT = "LIMIT"
METHOD = "METHOD"
MAXLENGTH = "MAXLENGTH"
CACHE_BLOCKS = "CACHE_BLOCKS"
REPLICATION_SCOPE = "REPLICATION_SCOPE"
# Load constants from hbase java API
def self.promote_constants(constants)
# The constants to import are all in uppercase
constants.each do |c|
next if c =~ /DEFAULT_.*/ || c != c.upcase
next if eval("defined?(#{c})")
eval("#{c} = '#{c}'")
end
end
promote_constants(HColumnDescriptor.constants)
promote_constants(HTableDescriptor.constants)
end
# Include classes definition
require 'hbase/hbase'
require 'hbase/admin'
require 'hbase/table'

View File

@ -0,0 +1,351 @@
include Java
java_import org.apache.hadoop.hbase.client.HBaseAdmin
java_import org.apache.zookeeper.ZooKeeperMain
java_import org.apache.hadoop.hbase.HColumnDescriptor
java_import org.apache.hadoop.hbase.HTableDescriptor
java_import org.apache.hadoop.hbase.HRegionInfo
java_import org.apache.zookeeper.ZooKeeper
# Wrapper for org.apache.hadoop.hbase.client.HBaseAdmin
module Hbase
class Admin
include HBaseConstants
def initialize(configuration, formatter)
@admin = HBaseAdmin.new(configuration)
connection = @admin.getConnection()
@zk_wrapper = connection.getZooKeeperWrapper()
zk = @zk_wrapper.getZooKeeper()
@zk_main = ZooKeeperMain.new(zk)
@formatter = formatter
end
#----------------------------------------------------------------------------------------------
# Returns a list of tables in hbase
def list
@admin.listTables.map { |t| t.getNameAsString }
end
#----------------------------------------------------------------------------------------------
# Requests a table or region flush
def flush(table_or_region_name)
@admin.flush(table_or_region_name)
end
#----------------------------------------------------------------------------------------------
# Requests a table or region compaction
def compact(table_or_region_name)
@admin.compact(table_or_region_name)
end
#----------------------------------------------------------------------------------------------
# Requests a table or region major compaction
def major_compact(table_or_region_name)
@admin.majorCompact(table_or_region_name)
end
#----------------------------------------------------------------------------------------------
# Requests a table or region split
def split(table_or_region_name)
@admin.split(table_or_region_name)
end
#----------------------------------------------------------------------------------------------
# Enables a table
def enable(table_name)
return if enabled?(table_name)
@admin.enableTable(table_name)
end
#----------------------------------------------------------------------------------------------
# Disables a table
def disable(table_name)
return unless enabled?(table_name)
@admin.disableTable(table_name)
end
#----------------------------------------------------------------------------------------------
# Drops a table
def drop(table_name)
raise ArgumentError, "Table #{table_name} does not exist.'" unless exists?(table_name)
raise ArgumentError, "Table #{table_name} is enabled. Disable it first.'" if enabled?(table_name)
@admin.deleteTable(table_name)
flush(HConstants::META_TABLE_NAME)
major_compact(HConstants::META_TABLE_NAME)
end
#----------------------------------------------------------------------------------------------
# Shuts hbase down
def shutdown
@admin.shutdown
end
#----------------------------------------------------------------------------------------------
# Returns ZooKeeper status dump
def zk_dump
@zk_wrapper.dump
end
#----------------------------------------------------------------------------------------------
# Creates a table
def create(table_name, *args)
# Fail if table name is not a string
raise(ArgumentError, "Table name must be of type String") unless table_name.kind_of?(String)
# Flatten params array
args = args.flatten.compact
# Fail if no column families defined
raise(ArgumentError, "Table must have at least one column family") if args.empty?
# Start defining the table
htd = HTableDescriptor.new(table_name)
# All args are columns, add them to the table definition
# TODO: add table options support
args.each do |arg|
unless arg.kind_of?(String) || arg.kind_of?(Hash)
raise(ArgumentError, "#{arg.class} of #{arg.inspect} is not of Hash or String type")
end
# Add column to the table
htd.addFamily(hcd(arg))
end
# Perform the create table call
@admin.createTable(htd)
end
#----------------------------------------------------------------------------------------------
# Closes a region
def close_region(region_name, server = nil)
@admin.closeRegion(region_name, server ? [server].to_java : nil)
end
#----------------------------------------------------------------------------------------------
# Enables a region
def enable_region(region_name)
online(region_name, false)
end
#----------------------------------------------------------------------------------------------
# Disables a region
def disable_region(region_name)
online(region_name, true)
end
#----------------------------------------------------------------------------------------------
# Returns table's structure description
def describe(table_name)
tables = @admin.listTables.to_a
tables << HTableDescriptor::META_TABLEDESC
tables << HTableDescriptor::ROOT_TABLEDESC
tables.each do |t|
# Found the table
return t.to_s if t.getNameAsString == table_name
end
raise(ArgumentError, "Failed to find table named #{table_name}")
end
#----------------------------------------------------------------------------------------------
# Truncates table (deletes all records by recreating the table)
def truncate(table_name)
h_table = HTable.new(table_name)
table_description = h_table.getTableDescriptor()
yield 'Disabling table...' if block_given?
disable(table_name)
yield 'Dropping table...' if block_given?
drop(table_name)
yield 'Creating table...' if block_given?
@admin.createTable(table_description)
end
#----------------------------------------------------------------------------------------------
# Change table structure or table options
def alter(table_name, *args)
# Table name should be a string
raise(ArgumentError, "Table name must be of type String") unless table_name.kind_of?(String)
# Table should exist
raise(ArgumentError, "Can't find a table: #{table_name}") unless exists?(table_name)
# Table should be disabled
raise(ArgumentError, "Table #{table_name} is enabled. Disable it first before altering.") if enabled?(table_name)
# There should be at least one argument
raise(ArgumentError, "There should be at least one argument but the table name") if args.empty?
# Get table descriptor
htd = @admin.getTableDescriptor(table_name.to_java_bytes)
# Process all args
args.each do |arg|
# Normalize args to support column name only alter specs
arg = { NAME => arg } if arg.kind_of?(String)
# Normalize args to support shortcut delete syntax
arg = { METHOD => 'delete', NAME => arg['delete'] } if arg['delete']
# No method parameter, try to use the args as a column definition
unless method = arg.delete(METHOD)
descriptor = hcd(arg)
column_name = descriptor.getNameAsString
# If column already exist, then try to alter it. Create otherwise.
if htd.hasFamily(column_name.to_java_bytes)
@admin.modifyColumn(table_name, column_name, descriptor)
else
@admin.addColumn(table_name, descriptor)
end
next
end
# Delete column family
if method == "delete"
raise(ArgumentError, "NAME parameter missing for delete method") unless arg[NAME]
@admin.deleteColumn(table_name, arg[NAME])
next
end
# Change table attributes
if method == "table_att"
htd.setMaxFileSize(JLong.valueOf(arg[MAX_FILESIZE])) if arg[MAX_FILESIZE]
htd.setReadOnly(JBoolean.valueOf(arg[READONLY])) if arg[READONLY]
htd.setMemStoreFlushSize(JLong.valueOf(arg[MEMSTORE_FLUSHSIZE])) if arg[MEMSTORE_FLUSHSIZE]
htd.setDeferredLogFlush(JBoolean.valueOf(arg[DEFERRED_LOG_FLUSH])) if arg[DEFERRED_LOG_FLUSH]
@admin.modifyTable(table_name.to_java_bytes, htd)
next
end
# Unknown method
raise ArgumentError, "Unknown method: #{method}"
end
end
def status(format)
status = @admin.getClusterStatus()
if format == "detailed"
puts("version %s" % [ status.getHBaseVersion() ])
# Put regions in transition first because usually empty
puts("%d regionsInTransition" % status.getRegionsInTransition().size())
for k, v in status.getRegionsInTransition()
puts(" %s" % [v])
end
puts("%d live servers" % [ status.getServers() ])
for server in status.getServerInfo()
puts(" %s:%d %d" % \
[ server.getServerAddress().getHostname(), \
server.getServerAddress().getPort(), server.getStartCode() ])
puts(" %s" % [ server.getLoad().toString() ])
for region in server.getLoad().getRegionsLoad()
puts(" %s" % [ region.getNameAsString() ])
puts(" %s" % [ region.toString() ])
end
end
puts("%d dead servers" % [ status.getDeadServers() ])
for server in status.getDeadServerNames()
puts(" %s" % [ server ])
end
elsif format == "simple"
load = 0
regions = 0
puts("%d live servers" % [ status.getServers() ])
for server in status.getServerInfo()
puts(" %s:%d %d" % \
[ server.getServerAddress().getHostname(), \
server.getServerAddress().getPort(), server.getStartCode() ])
puts(" %s" % [ server.getLoad().toString() ])
load += server.getLoad().getNumberOfRequests()
regions += server.getLoad().getNumberOfRegions()
end
puts("%d dead servers" % [ status.getDeadServers() ])
for server in status.getDeadServerNames()
puts(" %s" % [ server ])
end
puts("Aggregate load: %d, regions: %d" % [ load , regions ] )
else
puts "#{status.getServers} servers, #{status.getDeadServers} dead, #{'%.4f' % status.getAverageLoad} average load"
end
end
#----------------------------------------------------------------------------------------------
#
# Helper methods
#
# Does table exist?
def exists?(table_name)
@admin.tableExists(table_name)
end
#----------------------------------------------------------------------------------------------
# Is table enabled
def enabled?(table_name)
@admin.isTableEnabled(table_name)
end
#----------------------------------------------------------------------------------------------
# Return a new HColumnDescriptor made of passed args
def hcd(arg)
# String arg, single parameter constructor
return HColumnDescriptor.new(arg) if arg.kind_of?(String)
# TODO: This is brittle code.
# Here is current HCD constructor:
# public HColumnDescriptor(final byte [] familyName, final int maxVersions,
# final String compression, final boolean inMemory,
# final boolean blockCacheEnabled, final int blocksize,
# final int timeToLive, final boolean bloomFilter, final int scope) {
raise(ArgumentError, "Column family #{arg} must have a name") unless name = arg[NAME]
# TODO: What encoding are Strings in jruby?
return HColumnDescriptor.new(name.to_java_bytes,
# JRuby uses longs for ints. Need to convert. Also constants are String
arg[VERSIONS]? JInteger.new(arg[VERSIONS]): HColumnDescriptor::DEFAULT_VERSIONS,
arg[HColumnDescriptor::COMPRESSION]? arg[HColumnDescriptor::COMPRESSION]: HColumnDescriptor::DEFAULT_COMPRESSION,
arg[IN_MEMORY]? JBoolean.valueOf(arg[IN_MEMORY]): HColumnDescriptor::DEFAULT_IN_MEMORY,
arg[HColumnDescriptor::BLOCKCACHE]? JBoolean.valueOf(arg[HColumnDescriptor::BLOCKCACHE]): HColumnDescriptor::DEFAULT_BLOCKCACHE,
arg[HColumnDescriptor::BLOCKSIZE]? JInteger.valueOf(arg[HColumnDescriptor::BLOCKSIZE]): HColumnDescriptor::DEFAULT_BLOCKSIZE,
arg[HColumnDescriptor::TTL]? JInteger.new(arg[HColumnDescriptor::TTL]): HColumnDescriptor::DEFAULT_TTL,
arg[HColumnDescriptor::BLOOMFILTER]? JBoolean.valueOf(arg[HColumnDescriptor::BLOOMFILTER]): HColumnDescriptor::DEFAULT_BLOOMFILTER,
arg[HColumnDescriptor::REPLICATION_SCOPE]? JInteger.new(arg[REPLICATION_SCOPE]): HColumnDescriptor::DEFAULT_REPLICATION_SCOPE)
end
#----------------------------------------------------------------------------------------------
# Enables/disables a region by name
def online(region_name, on_off)
# Open meta table
meta = HTable.new(HConstants::META_TABLE_NAME)
# Read region info
# FIXME: fail gracefully if can't find the region
region_bytes = Bytes.toBytes(region_name)
g = Get.new(region_bytes)
g.addColumn(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER)
hri_bytes = meta.get(g).value
# Change region status
hri = Writables.getWritable(hri_bytes, HRegionInfo.new)
hri.setOffline(on_off)
# Write it back
put = Put.new(region_bytes)
put.add(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))
meta.put(put)
end
#----------------------------------------------------------------------------------------------
# Invoke a ZooKeeper maintenance command
def zk(args)
line = args.join(' ')
line = 'help' if line.empty?
@zk_main.executeLine(line)
end
end
end

View File

@ -0,0 +1,33 @@
include Java
import org.apache.hadoop.hbase.HBaseConfiguration
require 'hbase/admin'
require 'hbase/table'
module Hbase
class Hbase
attr_accessor :configuration
def initialize(config = nil)
# Create configuration
if config
self.configuration = config
else
self.configuration = org.apache.hadoop.hbase.HBaseConfiguration.create
# Turn off retries in hbase and ipc. Human doesn't want to wait on N retries.
configuration.setInt("hbase.client.retries.number", 7)
configuration.setInt("ipc.client.connect.max.retries", 3)
end
end
def admin(formatter)
::Hbase::Admin.new(configuration, formatter)
end
# Create new one each time
def table(table, formatter)
::Hbase::Table.new(configuration, table, formatter)
end
end
end

View File

@ -0,0 +1,290 @@
include Java
java_import org.apache.hadoop.hbase.client.HTable
java_import org.apache.hadoop.hbase.KeyValue
java_import org.apache.hadoop.hbase.util.Bytes
java_import org.apache.hadoop.hbase.util.Writables
java_import org.apache.hadoop.hbase.client.Put
java_import org.apache.hadoop.hbase.client.Get
java_import org.apache.hadoop.hbase.client.Delete
java_import org.apache.hadoop.hbase.client.Scan
java_import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter
# Wrapper for org.apache.hadoop.hbase.client.HTable
module Hbase
class Table
include HBaseConstants
def initialize(configuration, table_name, formatter)
@table = HTable.new(configuration, table_name)
end
#----------------------------------------------------------------------------------------------
# Put a cell 'value' at specified table/row/column
def put(row, column, value, timestamp = nil)
p = Put.new(row.to_s.to_java_bytes)
family, qualifier = parse_column_name(column)
if timestamp
p.add(family, qualifier, timestamp, value.to_s.to_java_bytes)
else
p.add(family, qualifier, value.to_s.to_java_bytes)
end
@table.put(p)
end
#----------------------------------------------------------------------------------------------
# Delete a cell
def delete(row, column, timestamp = HConstants::LATEST_TIMESTAMP)
deleteall(row, column, timestamp)
end
#----------------------------------------------------------------------------------------------
# Delete a row
def deleteall(row, column = nil, timestamp = HConstants::LATEST_TIMESTAMP)
d = Delete.new(row.to_s.to_java_bytes, timestamp, nil)
if column
family, qualifier = parse_column_name(column)
d.deleteColumns(family, qualifier, timestamp)
end
@table.delete(d)
end
#----------------------------------------------------------------------------------------------
# Increment a counter atomically
def incr(row, column, value = nil)
value ||= 1
family, qualifier = parse_column_name(column)
@table.incrementColumnValue(row.to_s.to_java_bytes, family, qualifier, value)
end
#----------------------------------------------------------------------------------------------
# Count rows in a table
def count(interval = 1000)
# We can safely set scanner caching with the first key only filter
scan = Scan.new
scan.cache_blocks = false
scan.caching = 10
scan.setFilter(FirstKeyOnlyFilter.new)
# Run the scanner
scanner = @table.getScanner(scan)
count = 0
iter = scanner.iterator
# Iterate results
while iter.hasNext
row = iter.next
count += 1
next unless (block_given? && count % interval == 0)
# Allow command modules to visualize counting process
yield(count, String.from_java_bytes(row.getRow))
end
# Return the counter
return count
end
#----------------------------------------------------------------------------------------------
# Get from table
def get(row, *args)
get = Get.new(row.to_s.to_java_bytes)
maxlength = -1
# Normalize args
args = args.first if args.first.kind_of?(Hash)
if args.kind_of?(String) || args.kind_of?(Array)
columns = [ args ].flatten.compact
args = { COLUMNS => columns }
end
#
# Parse arguments
#
unless args.kind_of?(Hash)
raise ArgumentError, "Failed parse of of #{args.inspect}, #{args.class}"
end
# Get maxlength parameter if passed
maxlength = args.delete(MAXLENGTH) if args[MAXLENGTH]
unless args.empty?
columns = args[COLUMN] || args[COLUMNS]
if columns
# Normalize types, convert string to an array of strings
columns = [ columns ] if columns.is_a?(String)
# At this point it is either an array or some unsupported stuff
unless columns.kind_of?(Array)
raise ArgumentError, "Failed parse column argument type #{args.inspect}, #{args.class}"
end
# Get each column name and add it to the filter
columns.each do |column|
family, qualifier = parse_column_name(column.to_s)
if qualifier
get.addColumn(family, qualifier)
else
get.addFamily(family)
end
end
# Additional params
get.setMaxVersions(args[VERSIONS] || 1)
get.setTimeStamp(args[TIMESTAMP]) if args[TIMESTAMP]
else
# May have passed TIMESTAMP and row only; wants all columns from ts.
unless ts = args[TIMESTAMP]
raise ArgumentError, "Failed parse of #{args.inspect}, #{args.class}"
end
# Set the timestamp
get.setTimeStamp(ts.to_i)
end
end
# Call hbase for the results
result = @table.get(get)
return nil if result.isEmpty
# Print out results. Result can be Cell or RowResult.
res = {}
result.list.each do |kv|
family = String.from_java_bytes(kv.getFamily)
qualifier = Bytes::toStringBinary(kv.getQualifier)
column = "#{family}:#{qualifier}"
value = to_string(column, kv, maxlength)
if block_given?
yield(column, value)
else
res[column] = value
end
end
# If block given, we've yielded all the results, otherwise just return them
return ((block_given?) ? nil : res)
end
#----------------------------------------------------------------------------------------------
# Scans whole table or a range of keys and returns rows matching specific criterias
def scan(args = {})
unless args.kind_of?(Hash)
raise ArgumentError, "Arguments should be a hash. Failed to parse #{args.inspect}, #{args.class}"
end
limit = args.delete("LIMIT") || -1
maxlength = args.delete("MAXLENGTH") || -1
if args.any?
filter = args["FILTER"]
startrow = args["STARTROW"] || ''
stoprow = args["STOPROW"]
timestamp = args["TIMESTAMP"]
columns = args["COLUMNS"] || args["COLUMN"] || get_all_columns
cache = args["CACHE_BLOCKS"] || true
versions = args["VERSIONS"] || 1
# Normalize column names
columns = [columns] if columns.class == String
unless columns.kind_of?(Array)
raise ArgumentError.new("COLUMNS must be specified as a String or an Array")
end
scan = if stoprow
Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes)
else
Scan.new(startrow.to_java_bytes)
end
columns.each { |c| scan.addColumns(c) }
scan.setFilter(filter) if filter
scan.setTimeStamp(timestamp) if timestamp
scan.setCacheBlocks(cache)
scan.setMaxVersions(versions) if versions > 1
else
scan = Scan.new
end
# Start the scanner
scanner = @table.getScanner(scan)
count = 0
res = {}
iter = scanner.iterator
# Iterate results
while iter.hasNext
if limit > 0 && count >= limit
break
end
row = iter.next
key = Bytes::toStringBinary(row.getRow)
row.list.each do |kv|
family = String.from_java_bytes(kv.getFamily)
qualifier = Bytes::toStringBinary(kv.getQualifier)
column = "#{family}:#{qualifier}"
cell = to_string(column, kv, maxlength)
if block_given?
yield(key, "column=#{column}, #{cell}")
else
res[key] ||= {}
res[key][column] = cell
end
end
# One more row processed
count += 1
end
return ((block_given?) ? count : res)
end
#----------------------------------------------------------------------------------------
# Helper methods
# Returns a list of column names in the table
def get_all_columns
@table.table_descriptor.getFamilies.map do |family|
"#{family.getNameAsString}:"
end
end
# Checks if current table is one of the 'meta' tables
def is_meta_table?
tn = @table.table_name
Bytes.equals(tn, HConstants::META_TABLE_NAME) || Bytes.equals(tn, HConstants::ROOT_TABLE_NAME)
end
# Returns family and (when has it) qualifier for a column name
def parse_column_name(column)
split = KeyValue.parseColumn(column.to_java_bytes)
return split[0], (split.length > 1) ? split[1] : nil
end
# Make a String of the passed kv
# Intercept cells whose format we know such as the info:regioninfo in .META.
def to_string(column, kv, maxlength = -1)
if is_meta_table?
if column == 'info:regioninfo'
hri = Writables.getHRegionInfoOrNull(kv.getValue)
return "timestamp=%d, value=%s" % [kv.getTimestamp, hri.toString]
end
if column == 'info:serverstartcode'
return "timestamp=%d, value=%s" % [kv.getTimestamp, Bytes.toLong(kv.getValue)]
end
end
val = "timestamp=#{kv.getTimestamp}, value=#{Bytes::toStringBinary(kv.getValue)}"
(maxlength != -1) ? val[0, maxlength] : val
end
end
end

View File

@ -0,0 +1,32 @@
module IRB
# Subclass of IRB so can intercept methods
class HIRB < Irb
def initialize
# This is ugly. Our 'help' method above provokes the following message
# on irb construction: 'irb: warn: can't alias help from irb_help.'
# Below, we reset the output so its pointed at /dev/null during irb
# construction just so this message does not come out after we emit
# the banner. Other attempts at playing with the hash of methods
# down in IRB didn't seem to work. I think the worst thing that can
# happen is the shell exiting because of failed IRB construction with
# no error (though we're not blanking STDERR)
begin
f = File.open("/dev/null", "w")
$stdout = f
super
ensure
f.close()
$stdout = STDOUT
end
end
def output_value
# Suppress output if last_value is 'nil'
# Otherwise, when user types help, get ugly 'nil'
# after all output.
if @context.last_value != nil
super
end
end
end
end

238
core/src/main/ruby/shell.rb Normal file
View File

@ -0,0 +1,238 @@
# Shell commands module
module Shell
@@commands = {}
def self.commands
@@commands
end
@@command_groups = {}
def self.command_groups
@@command_groups
end
def self.load_command(name, group)
return if commands[name]
# Register command in the group
raise ArgumentError, "Unknown group: #{group}" unless command_groups[group]
command_groups[group][:commands] << name
# Load command
begin
require "shell/commands/#{name}"
klass_name = name.to_s.gsub(/(?:^|_)(.)/) { $1.upcase } # camelize
commands[name] = eval("Commands::#{klass_name}")
rescue => e
raise "Can't load hbase shell command: #{name}. Error: #{e}\n#{e.backtrace.join("\n")}"
end
end
def self.load_command_group(group, opts)
raise ArgumentError, "No :commands for group #{group}" unless opts[:commands]
command_groups[group] = {
:commands => [],
:command_names => opts[:commands],
:full_name => opts[:full_name] || group,
:comment => opts[:comment]
}
opts[:commands].each do |command|
load_command(command, group)
end
end
#----------------------------------------------------------------------
class Shell
attr_accessor :hbase
attr_accessor :formatter
def initialize(hbase, formatter)
self.hbase = hbase
self.formatter = formatter
end
def hbase_admin
@hbase_admin ||= hbase.admin(formatter)
end
def hbase_table(name)
hbase.table(name, formatter)
end
def export_commands(where)
::Shell.commands.keys.each do |cmd|
where.send :instance_eval, <<-EOF
def #{cmd}(*args)
@shell.command('#{cmd}', *args)
end
EOF
end
end
def command_instance(command)
::Shell.commands[command.to_s].new(self)
end
def command(command, *args)
command_instance(command).command_safe(*args)
end
def print_banner
puts "HBase Shell; enter 'help<RETURN>' for list of supported commands."
puts 'Type "exit<RETURN>" to leave the HBase Shell'
command('version')
end
def help_command(command)
puts "COMMAND: #{command}"
puts command_instance(command).help
puts
return nil
end
def help_group(group_name)
group = ::Shell.command_groups[group_name.to_s]
puts group[:full_name]
puts '-' * 80
group[:commands].sort.each { |cmd| help_command(cmd) }
if group[:comment]
puts '-' * 80
puts
puts group[:comment]
puts
end
return nil
end
def help(command = nil)
puts
if command
return help_command(command) if ::Shell.commands[command.to_s]
return help_group(command) if ::Shell.command_groups[command.to_s]
puts "ERROR: Invalid command or command group name: #{command}"
puts
end
puts help_header
puts
puts '-' * 80
puts
puts "Here is the list of groups with their commands:"
puts
::Shell.command_groups.each do |name, group|
puts " " + group[:full_name] + ": "
puts " group name: " + name
puts " commands: " + group[:command_names].sort.join(', ')
puts
end
puts
unless command
puts '-' * 80
puts
help_footer
puts
end
return nil
end
def help_header
return "There are few groups of commands in HBase\n\n" +
"Use help 'group_name' (e.g. help 'general') to get help on all commands in a group\n" +
"Use help 'command' (e.g. help 'get') to get help on a specific command"
end
def help_footer
puts "GENERAL NOTES:"
puts <<-HERE
Quote all names in the hbase shell such as table and column names. Don't
forget commas delimit command parameters. Type <RETURN> after entering a
command to run it. Dictionaries of configuration used in the creation
and alteration of tables are ruby Hashes. They look like this:
{'key1' => 'value1', 'key2' => 'value2', ...}
They are opened and closed with curley-braces. Key/values are delimited
by the '=>' character combination. Usually keys are predefined constants
such as NAME, VERSIONS, COMPRESSION, etc. Constants do not need to be
quoted. Type 'Object.constants' to see a (messy) list of all constants
in the environment.
In case you are using binary keys or values and need to enter them into
the shell then use double-quotes to make use of hexadecimal for example:
hbase> get 't1', "key\\x03\\x3f\\xcd"
hbase> get 't1', "key\\003\\023\\011"
hbase> put 't1', "test\\xef\\xff", 'f1:', "\\x01\\x33\\x40"
Using the double-quote notation you can directly use the values output by
the shell for example during a "scan" call.
This HBase shell is the JRuby IRB with the above HBase-specific commands
added. For more on the HBase Shell, see http://wiki.apache.org/hadoop/Hbase/Shell
HERE
end
end
end
# Load commands base class
require 'shell/commands'
# Load all commands
Shell.load_command_group(
'general',
:full_name => 'GENERAL HBASE SHELL COMMANDS',
:commands => %w[
status
version
]
)
Shell.load_command_group(
'ddl',
:full_name => 'TABLES MANAGEMENT COMMANDS',
:commands => %w[
alter
create
describe
disable
drop
enable
exists
list
]
)
Shell.load_command_group(
'dml',
:full_name => 'DATA MANIPULATION COMMANDS',
:commands => %w[
count
delete
deleteall
get
incr
put
scan
truncate
]
)
Shell.load_command_group(
'tools',
:full_name => 'HBASE SURGERY TOOLS',
:comment => "WARNING: Above commands are for 'experts'-only as misuse can damage an install",
:commands => %w[
close_region
compact
disable_region
enable_region
flush
major_compact
shutdown
split
zk
zk_dump
]
)

View File

@ -0,0 +1,45 @@
module Shell
module Commands
class Command
attr_accessor :shell
def initialize(shell)
self.shell = shell
end
def command_safe(*args)
command(*args)
rescue ArgumentError => e
puts
puts "ERROR: #{e}"
puts
puts "Here is some help for this command:"
puts help
puts
ensure
return nil
end
def admin
shell.hbase_admin
end
def table(name)
shell.hbase_table(name)
end
#----------------------------------------------------------------------
def formatter
shell.formatter
end
def format_simple_command
now = Time.now
yield
formatter.header
formatter.footer(now)
end
end
end
end

View File

@ -0,0 +1,38 @@
module Shell
module Commands
class Alter < Command
def help
return <<-EOF
Alter column family schema; pass table name and a dictionary
specifying new column family schema. Dictionaries are described
on the main help command output. Dictionary must include name
of column family to alter. For example,
To change or add the 'f1' column family in table 't1' from defaults
to instead keep a maximum of 5 cell VERSIONS, do:
hbase> alter 't1', NAME => 'f1', VERSIONS => 5
To delete the 'f1' column family in table 't1', do:
hbase> alter 't1', NAME => 'f1', METHOD => 'delete'
or a shorter version:
hbase> alter 't1', 'delete' => 'f1'
You can also change table-scope attributes like MAX_FILESIZE
MEMSTORE_FLUSHSIZE, READONLY, and DEFERRED_LOG_FLUSH.
For example, to change the max size of a family to 128MB, do:
hbase> alter 't1', METHOD => 'table_att', MAX_FILESIZE => '134217728'
There could be more than one alteration in one command:
hbase> alter 't1', {NAME => 'f1'}, {NAME => 'f2', METHOD => 'delete'}
EOF
end
def command(table, *args)
format_simple_command do
admin.alter(table, *args)
end
end
end
end
end

View File

@ -0,0 +1,20 @@
module Shell
module Commands
class CloseRegion < Command
def help
return <<-EOF
Close a single region. Optionally specify regionserver.
Examples:
hbase> close_region 'REGIONNAME'
hbase> close_region 'REGIONNAME', 'REGIONSERVER_IP:PORT'
EOF
end
def command(region_name, server = nil)
format_simple_command do
admin.close_region(region_name, server)
end
end
end
end
end

View File

@ -0,0 +1,18 @@
module Shell
module Commands
class Compact < Command
def help
return <<-EOF
Compact all regions in passed table or pass a region row
to compact an individual region
EOF
end
def command(table_or_region_name)
format_simple_command do
admin.compact(table_or_region_name)
end
end
end
end
end

View File

@ -0,0 +1,26 @@
module Shell
module Commands
class Count < Command
def help
return <<-EOF
Count the number of rows in a table. This operation may take a LONG
time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a
counting mapreduce job). Current count is shown every 1000 rows by
default. Count interval may be optionally specified. Examples:
hbase> count 't1'
hbase> count 't1', 100000
EOF
end
def command(table, interval = 1000)
now = Time.now
formatter.header
count = table(table).count(interval) do |cnt, row|
formatter.row([ "Current count: #{cnt}, row: #{row}" ])
end
formatter.footer(now, count)
end
end
end
end

View File

@ -0,0 +1,27 @@
module Shell
module Commands
class Create < Command
def help
return <<-EOF
Create table; pass table name, a dictionary of specifications per
column family, and optionally a dictionary of table configuration.
Dictionaries are described below in the GENERAL NOTES section.
Examples:
hbase> create 't1', {NAME => 'f1', VERSIONS => 5}
hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
hbase> # The above in shorthand would be the following:
hbase> create 't1', 'f1', 'f2', 'f3'
hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000,
BLOCKCACHE => true}
EOF
end
def command(table, *args)
format_simple_command do
admin.create(table, *args)
end
end
end
end
end

View File

@ -0,0 +1,23 @@
module Shell
module Commands
class Delete < Command
def help
return <<-EOF
Put a delete cell value at specified table/row/column and optionally
timestamp coordinates. Deletes must match the deleted cell's
coordinates exactly. When scanning, a delete cell suppresses older
versions. To delete a cell from 't1' at row 'r1' under column 'c1'
marked with the time 'ts1', do:
hbase> delete 't1', 'r1', 'c1', ts1
EOF
end
def command(table, row, column, timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP)
format_simple_command do
table(table).delete(row, column, timestamp)
end
end
end
end
end

View File

@ -0,0 +1,22 @@
module Shell
module Commands
class Deleteall < Command
def help
return <<-EOF
Delete all cells in a given row; pass a table name, row, and optionally
a column and timestamp. Examples:
hbase> deleteall 't1', 'r1'
hbase> deleteall 't1', 'r1', 'c1'
hbase> deleteall 't1', 'r1', 'c1', ts1
EOF
end
def command(table, row, column = nil, timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP)
format_simple_command do
table(table).deleteall(row, column, timestamp)
end
end
end
end
end

View File

@ -0,0 +1,22 @@
module Shell
module Commands
class Describe < Command
def help
return <<-EOF
Describe the named table. For example:
hbase> describe 't1'
EOF
end
def command(table)
now = Time.now
desc = admin.describe(table)
formatter.header([ "DESCRIPTION", "ENABLED" ], [ 64 ])
formatter.row([ desc, admin.enabled?(table).to_s ], true, [ 64 ])
formatter.footer(now)
end
end
end
end

View File

@ -0,0 +1,17 @@
module Shell
module Commands
class Disable < Command
def help
return <<-EOF
Disable the named table: e.g. "hbase> disable 't1'"
EOF
end
def command(table)
format_simple_command do
admin.disable(table)
end
end
end
end
end

View File

@ -0,0 +1,19 @@
module Shell
module Commands
class DisableRegion < Command
def help
return <<-EOF
Disable a single region. For example:
hbase> disable_region 'REGIONNAME'
EOF
end
def command(region_name)
format_simple_command do
admin.disable_region(region_name)
end
end
end
end
end

View File

@ -0,0 +1,20 @@
module Shell
module Commands
class Drop < Command
def help
return <<-EOF
Drop the named table. Table must first be disabled. If table has
more than one region, run a major compaction on .META.:
hbase> major_compact ".META."
EOF
end
def command(table)
format_simple_command do
admin.drop(table)
end
end
end
end
end

View File

@ -0,0 +1,17 @@
module Shell
module Commands
class Enable < Command
def help
return <<-EOF
Enable the named table: e.g. "hbase> enable 't1'"
EOF
end
def command(table)
format_simple_command do
admin.enable(table)
end
end
end
end
end

View File

@ -0,0 +1,19 @@
module Shell
module Commands
class EnableRegion < Command
def help
return <<-EOF
Enable a single region. For example:
hbase> enable_region 'REGIONNAME'
EOF
end
def command(region_name)
format_simple_command do
admin.enable_region(region_name)
end
end
end
end
end

View File

@ -0,0 +1,19 @@
module Shell
module Commands
class Exists < Command
def help
return <<-EOF
Does the named table exist? e.g. "hbase> exists 't1'"
EOF
end
def command(table)
format_simple_command do
formatter.row([
"Table #{table} " + (admin.exists?(table.to_s) ? "does exist" : "does not exist")
])
end
end
end
end
end

View File

@ -0,0 +1,21 @@
module Shell
module Commands
class Flush < Command
def help
return <<-EOF
Flush all regions in passed table or pass a region row to
flush an individual region. For example:
hbase> flush 'TABLENAME'
hbase> flush 'REGIONNAME'
EOF
end
def command(table_or_region_name)
format_simple_command do
admin.flush(table_or_region_name)
end
end
end
end
end

View File

@ -0,0 +1,32 @@
module Shell
module Commands
class Get < Command
def help
return <<-EOF
Get row or cell contents; pass table name, row, and optionally
a dictionary of column(s), timestamp and versions. Examples:
hbase> get 't1', 'r1'
hbase> get 't1', 'r1', {COLUMN => 'c1'}
hbase> get 't1', 'r1', {COLUMN => ['c1', 'c2', 'c3']}
hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1}
hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1, VERSIONS => 4}
hbase> get 't1', 'r1', 'c1'
hbase> get 't1', 'r1', 'c1', 'c2'
hbase> get 't1', 'r1', ['c1', 'c2']
EOF
end
def command(table, row, *args)
now = Time.now
formatter.header(["COLUMN", "CELL"])
table(table).get(row, *args) do |column, value|
formatter.row([ column, value ])
end
formatter.footer(now)
end
end
end
end

View File

@ -0,0 +1,23 @@
module Shell
module Commands
class Incr < Command
def help
return <<-EOF
Increments a cell 'value' at specified table/row/column coordinates.
To increment a cell value in table 't1' at row 'r1' under column
'c1' by 1 (can be omitted) or 10 do:
hbase> incr 't1', 'r1', 'c1'
hbase> incr 't1', 'r1', 'c1', 1
hbase> incr 't1', 'r1', 'c1', 10
EOF
end
def command(table, row, column, value = nil)
format_simple_command do
table(table).incr(row, column, value)
end
end
end
end
end

View File

@ -0,0 +1,17 @@
module Shell
module Commands
class List < Command
def help
return <<-EOF
List all tables in hbase
EOF
end
def command
format_simple_command do
admin.list
end
end
end
end
end

View File

@ -0,0 +1,18 @@
module Shell
module Commands
class MajorCompact < Command
def help
return <<-EOF
Run major compaction on passed table or pass a region row
to major compact an individual region
EOF
end
def command(table_or_region_name)
format_simple_command do
admin.major_compact(table_or_region_name)
end
end
end
end
end

View File

@ -0,0 +1,21 @@
module Shell
module Commands
class Put < Command
def help
return <<-EOF
Put a cell 'value' at specified table/row/column and optionally
timestamp coordinates. To put a cell value into table 't1' at
row 'r1' under column 'c1' marked with the time 'ts1', do:
hbase> put 't1', 'r1', 'c1', 'value', ts1
EOF
end
def command(table, row, column, value, timestamp = nil)
format_simple_command do
table(table).put(row, column, value, timestamp)
end
end
end
end
end

View File

@ -0,0 +1,37 @@
module Shell
module Commands
class Scan < Command
def help
return <<-EOF
Scan a table; pass table name and optionally a dictionary of scanner
specifications. Scanner specifications may include one or more of
the following: LIMIT, STARTROW, STOPROW, TIMESTAMP, or COLUMNS. If
no columns are specified, all columns will be scanned. To scan all
members of a column family, leave the qualifier empty as in
'col_family:'. Examples:
hbase> scan '.META.'
hbase> scan '.META.', {COLUMNS => 'info:regioninfo'}
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
For experts, there is an additional option -- CACHE_BLOCKS -- which
switches block caching for the scanner on (true) or off (false). By
default it is enabled. Examples:
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false}
EOF
end
def command(table, args = {})
now = Time.now
formatter.header(["ROW", "COLUMN+CELL"])
count = table(table).scan(args) do |row, cells|
formatter.row([ row, cells ])
end
formatter.footer(now, count)
end
end
end
end

View File

@ -0,0 +1,15 @@
module Shell
module Commands
class Shutdown < Command
def help
return <<-EOF
Shut down the cluster.
EOF
end
def command
admin.shutdown
end
end
end
end

View File

@ -0,0 +1,17 @@
module Shell
module Commands
class Split < Command
def help
return <<-EOF
Split table or pass a region row to split individual region
EOF
end
def command(table_or_region_name)
format_simple_command do
admin.split(table_or_region_name)
end
end
end
end
end

View File

@ -0,0 +1,21 @@
module Shell
module Commands
class Status < Command
def help
return <<-EOF
Show cluster status. Can be 'summary', 'simple', or 'detailed'. The
default is 'summary'. Examples:
hbase> status
hbase> status 'simple'
hbase> status 'summary'
hbase> status 'detailed'
EOF
end
def command(format = 'summary')
admin.status(format)
end
end
end
end

View File

@ -0,0 +1,19 @@
module Shell
module Commands
class Truncate < Command
def help
return <<-EOF
Disables, drops and recreates the specified table.
EOF
end
def command(table)
format_simple_command do
puts "Truncating '#{table}' table (it may take a while):"
admin.truncate(table) { |log| puts " - #{log}" }
end
end
end
end
end

View File

@ -0,0 +1,18 @@
module Shell
module Commands
class Version < Command
def help
return <<-EOF
Output this HBase version
EOF
end
def command
# Output version.
puts "Version: #{org.apache.hadoop.hbase.util.VersionInfo.getVersion()}, " +
"r#{org.apache.hadoop.hbase.util.VersionInfo.getRevision()}, " +
"#{org.apache.hadoop.hbase.util.VersionInfo.getDate()}"
end
end
end
end

View File

@ -0,0 +1,16 @@
module Shell
module Commands
class Zk < Command
def help
return <<-EOF
Low level ZooKeeper surgery tools. Type "zk 'help'" for more
information (Yes, you must quote 'help').
EOF
end
def command(*args)
admin.zk(args)
end
end
end
end

View File

@ -0,0 +1,15 @@
module Shell
module Commands
class ZkDump < Command
def help
return <<-EOF
Dump status of HBase cluster as seen by ZooKeeper.
EOF
end
def command
puts admin.zk_dump
end
end
end
end

View File

@ -0,0 +1,127 @@
# Results formatter
module Shell
module Formatter
# Base abstract class for results formatting.
class Base
attr_reader :row_count
def is_valid_io?(obj)
obj.instance_of?(IO) || obj == Kernel
end
# Takes an output stream and a print width.
def initialize(opts = {})
options = {
:output_stream => Kernel,
:format_width => 100
}.merge(opts)
@out = options[:output_stream]
@max_width = options[:format_width]
@row_count = 0
# raise an error if the stream is not valid
raise(TypeError, "Type #{@out.class} of parameter #{@out} is not IO") unless is_valid_io?(@out)
end
def header(args = [], widths = [])
row(args, false, widths) if args.length > 0
@row_count = 0
end
# Output a row.
# Inset is whether or not to offset row by a space.
def row(args = [], inset = true, widths = [])
# Print out nothing
return if !args || args.empty?
# Print a string
if args.is_a?(String)
output(@max_width, args)
@out.puts
return
end
# TODO: Look at the type. Is it RowResult?
if args.length == 1
splits = split(@max_width, dump(args[0]))
for l in splits
output(@max_width, l)
@out.puts
end
elsif args.length == 2
col1width = (not widths or widths.length == 0) ? @max_width / 4 : @max_width * widths[0] / 100
col2width = (not widths or widths.length < 2) ? @max_width - col1width - 2 : @max_width * widths[1] / 100 - 2
splits1 = split(col1width, dump(args[0]))
splits2 = split(col2width, dump(args[1]))
biggest = (splits2.length > splits1.length)? splits2.length: splits1.length
index = 0
while index < biggest
# Inset by one space if inset is set.
@out.print(" ") if inset
output(col1width, splits1[index])
# Add extra space so second column lines up w/ second column output
@out.print(" ") unless inset
@out.print(" ")
output(col2width, splits2[index])
index += 1
@out.puts
end
else
# Print a space to set off multi-column rows
print ' '
first = true
for e in args
@out.print " " unless first
first = false
@out.print e
end
puts
end
@row_count += 1
end
def split(width, str)
result = []
index = 0
while index < str.length do
result << str.slice(index, width)
index += width
end
result
end
def dump(str)
return if str.instance_of?(Fixnum)
# Remove double-quotes added by 'dump'.
return str
end
def output(width, str)
# Make up a spec for printf
spec = "%%-%ds" % width
@out.printf(spec, str)
end
def footer(start_time = nil, row_count = nil)
return unless start_time
row_count ||= @row_count
# Only output elapsed time and row count if startTime passed
@out.puts("%d row(s) in %.4f seconds" % [row_count, Time.now - start_time])
end
end
class Console < Base
end
class XHTMLFormatter < Base
# http://www.germane-software.com/software/rexml/doc/classes/REXML/Document.html
# http://www.crummy.com/writing/RubyCookbook/test_results/75942.html
end
class JSON < Base
end
end
end

View File

@ -0,0 +1,72 @@
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.jruby.embed.ScriptingContainer;
import org.jruby.embed.PathType;
/**
*
* @author scoundrel
*/
public class TestShell {
final Log LOG = LogFactory.getLog(getClass());
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private final static ScriptingContainer jruby = new ScriptingContainer();
@BeforeClass
public static void setUpBeforeClass() throws Exception {
// Start mini cluster
TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100);
TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250);
TEST_UTIL.getConfiguration().setInt("hbase.client.retries.number", 6);
TEST_UTIL.startMiniCluster();
// Configure jruby runtime
List<String> loadPaths = new ArrayList();
loadPaths.add("src/main/ruby");
loadPaths.add("src/test/ruby");
jruby.getProvider().setLoadPaths(loadPaths);
jruby.put("$TEST_CLUSTER", TEST_UTIL);
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
@Test
public void testRunShellTests() throws IOException {
// Start all ruby tests
jruby.runScriptlet(PathType.ABSOLUTE, "src/test/ruby/tests_runner.rb");
}
}

View File

@ -0,0 +1,263 @@
require 'hbase'
include HBaseConstants
module Hbase
class AdminHelpersTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
# Create test table if it does not exist
@test_name = "hbase_shell_tests_table"
create_test_table(@test_name)
end
define_test "exists? should return true when a table exists" do
assert(admin.exists?('.META.'))
end
define_test "exists? should return false when a table exists" do
assert(!admin.exists?('.NOT.EXISTS.'))
end
define_test "enabled? should return true for enabled tables" do
admin.enable(@test_name)
assert(admin.enabled?(@test_name))
end
define_test "enabled? should return false for disabled tables" do
admin.disable(@test_name)
assert(!admin.enabled?(@test_name))
end
end
# Simple administration methods tests
class AdminMethodsTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
# Create test table if it does not exist
@test_name = "hbase_shell_tests_table"
create_test_table(@test_name)
# Create table test table name
@create_test_name = 'hbase_create_table_test_table'
end
define_test "list should return a list of tables" do
assert(admin.list.member?(@test_name))
end
define_test "list should not return meta tables" do
assert(!admin.list.member?('.META.'))
assert(!admin.list.member?('-ROOT-'))
end
#-------------------------------------------------------------------------------
define_test "flush should work" do
admin.flush('.META.')
end
#-------------------------------------------------------------------------------
define_test "compact should work" do
admin.compact('.META.')
end
#-------------------------------------------------------------------------------
define_test "major_compact should work" do
admin.major_compact('.META.')
end
#-------------------------------------------------------------------------------
define_test "split should work" do
admin.split('.META.')
end
#-------------------------------------------------------------------------------
define_test "drop should fail on non-existent tables" do
assert_raise(ArgumentError) do
admin.drop('.NOT.EXISTS.')
end
end
define_test "drop should fail on enabled tables" do
assert_raise(ArgumentError) do
admin.drop(@test_name)
end
end
define_test "drop should drop tables" do
admin.disable(@test_name)
admin.drop(@test_name)
assert(!admin.exists?(@test_name))
end
#-------------------------------------------------------------------------------
define_test "zk_dump should work" do
assert_not_nil(admin.zk_dump)
end
#-------------------------------------------------------------------------------
define_test "create should fail with non-string table names" do
assert_raise(ArgumentError) do
admin.create(123, 'xxx')
end
end
define_test "create should fail with non-string/non-hash column args" do
assert_raise(ArgumentError) do
admin.create(@create_test_name, 123)
end
end
define_test "create should fail without columns" do
drop_test_table(@create_test_name)
assert_raise(ArgumentError) do
admin.create(@create_test_name)
end
end
define_test "create should work with string column args" do
drop_test_table(@create_test_name)
admin.create(@create_test_name, 'a', 'b')
assert_equal(['a:', 'b:'], table(@create_test_name).get_all_columns.sort)
end
define_test "create hould work with hash column args" do
drop_test_table(@create_test_name)
admin.create(@create_test_name, { NAME => 'a'}, { NAME => 'b'})
assert_equal(['a:', 'b:'], table(@create_test_name).get_all_columns.sort)
end
#-------------------------------------------------------------------------------
# define_test "close should work without region server name" do
# if admin.exists?(@create_test_name)
# admin.disable(@create_test_name)
# admin.drop(@create_test_name)
# end
# admin.create(@create_test_name, 'foo')
# admin.close_region(@create_test_name + ',,0')
# end
#-------------------------------------------------------------------------------
define_test "describe should fail for non-existent tables" do
assert_raise(ArgumentError) do
admin.describe('.NOT.EXISTS.')
end
end
define_test "describe should return a description" do
assert_not_nil admin.describe(@test_name)
end
#-------------------------------------------------------------------------------
define_test "truncate should empty a table" do
table(@test_name).put(1, "x:a", 1)
table(@test_name).put(2, "x:a", 2)
assert_equal(2, table(@test_name).count)
admin.truncate(@test_name)
assert_equal(0, table(@test_name).count)
end
define_test "truncate should yield log records" do
logs = []
admin.truncate(@test_name) do |log|
assert_kind_of(String, log)
logs << log
end
assert(!logs.empty?)
end
end
# Simple administration methods tests
class AdminAlterTableTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
# Create test table if it does not exist
@test_name = "hbase_shell_tests_table"
drop_test_table(@test_name)
create_test_table(@test_name)
end
#-------------------------------------------------------------------------------
define_test "alter should fail with non-string table names" do
assert_raise(ArgumentError) do
admin.alter(123, METHOD => 'delete', NAME => 'y')
end
end
define_test "alter should fail with non-existing tables" do
assert_raise(ArgumentError) do
admin.alter('.NOT.EXISTS.', METHOD => 'delete', NAME => 'y')
end
end
define_test "alter should fail with enabled tables" do
assert_raise(ArgumentError) do
admin.alter(@test_name, METHOD => 'delete', NAME => 'y')
end
end
define_test "alter should be able to delete column families" do
assert_equal(['x:', 'y:'], table(@test_name).get_all_columns.sort)
admin.disable(@test_name)
admin.alter(@test_name, METHOD => 'delete', NAME => 'y')
admin.enable(@test_name)
assert_equal(['x:'], table(@test_name).get_all_columns.sort)
end
define_test "alter should be able to add column families" do
assert_equal(['x:', 'y:'], table(@test_name).get_all_columns.sort)
admin.disable(@test_name)
admin.alter(@test_name, NAME => 'z')
admin.enable(@test_name)
assert_equal(['x:', 'y:', 'z:'], table(@test_name).get_all_columns.sort)
end
define_test "alter should be able to add column families (name-only alter spec)" do
assert_equal(['x:', 'y:'], table(@test_name).get_all_columns.sort)
admin.disable(@test_name)
admin.alter(@test_name, 'z')
admin.enable(@test_name)
assert_equal(['x:', 'y:', 'z:'], table(@test_name).get_all_columns.sort)
end
define_test "alter should support more than one alteration in one call" do
assert_equal(['x:', 'y:'], table(@test_name).get_all_columns.sort)
admin.disable(@test_name)
admin.alter(@test_name, { NAME => 'z' }, { METHOD => 'delete', NAME => 'y' })
admin.enable(@test_name)
assert_equal(['x:', 'z:'], table(@test_name).get_all_columns.sort)
end
define_test 'alter should support shortcut DELETE alter specs' do
assert_equal(['x:', 'y:'], table(@test_name).get_all_columns.sort)
admin.disable(@test_name)
admin.alter(@test_name, 'delete' => 'y')
admin.disable(@test_name)
assert_equal(['x:'], table(@test_name).get_all_columns.sort)
end
define_test "alter should be able to change table options" do
admin.disable(@test_name)
admin.alter(@test_name, METHOD => 'table_att', 'MAX_FILESIZE' => 12345678)
admin.disable(@test_name)
assert_match(/12345678/, admin.describe(@test_name))
end
end
end

View File

@ -0,0 +1,30 @@
require 'hbase'
module Hbase
class HbaseTest < Test::Unit::TestCase
def setup
@formatter = Shell::Formatter::Console.new(:format_width => 110)
@hbase = ::Hbase::Hbase.new($TEST_CLUSTER.getConfiguration)
end
define_test "Hbase::Hbase constructor should initialize hbase configuration object" do
assert_kind_of(org.apache.hadoop.conf.Configuration, @hbase.configuration)
end
define_test "Hbase::Hbase#admin should create a new admin object when called the first time" do
assert_kind_of(::Hbase::Admin, @hbase.admin(@formatter))
end
define_test "Hbase::Hbase#admin should create a new admin object every call" do
assert_not_same(@hbase.admin(@formatter), @hbase.admin(@formatter))
end
define_test "Hbase::Hbase#table should create a new table object when called the first time" do
assert_kind_of(::Hbase::Table, @hbase.table('.META.', @formatter))
end
define_test "Hbase::Hbase#table should create a new table object every call" do
assert_not_same(@hbase.table('.META.', @formatter), @hbase.table('.META.', @formatter))
end
end
end

View File

@ -0,0 +1,383 @@
require 'hbase'
include HBaseConstants
module Hbase
# Constructor tests
class TableConstructorTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
end
define_test "Hbase::Table constructor should fail for non-existent tables" do
assert_raise(NativeException) do
table('non-existent-table-name')
end
end
define_test "Hbase::Table constructor should not fail for existent tables" do
assert_nothing_raised do
table('.META.')
end
end
end
# Helper methods tests
class TableHelpersTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
# Create test table if it does not exist
@test_name = "hbase_shell_tests_table"
create_test_table(@test_name)
@test_table = table(@test_name)
end
define_test "is_meta_table? method should return true for the meta table" do
assert(table('.META.').is_meta_table?)
end
define_test "is_meta_table? method should return true for the root table" do
assert(table('-ROOT-').is_meta_table?)
end
define_test "is_meta_table? method should return false for a normal table" do
assert(!@test_table.is_meta_table?)
end
#-------------------------------------------------------------------------------
define_test "get_all_columns should return columns list" do
cols = table('.META.').get_all_columns
assert_kind_of(Array, cols)
assert(cols.length > 0)
end
#-------------------------------------------------------------------------------
define_test "parse_column_name should not return a qualifier for name-only column specifiers" do
col, qual = table('.META.').parse_column_name('foo')
assert_not_nil(col)
assert_nil(qual)
end
define_test "parse_column_name should not return a qualifier for family-only column specifiers" do
col, qual = table('.META.').parse_column_name('foo:')
assert_not_nil(col)
assert_nil(qual)
end
define_test "parse_column_name should return a qualifier for family:qualifier column specifiers" do
col, qual = table('.META.').parse_column_name('foo:bar')
assert_not_nil(col)
assert_not_nil(qual)
end
end
# Simple data management methods tests
class TableSimpleMethodsTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
# Create test table if it does not exist
@test_name = "hbase_shell_tests_table"
create_test_table(@test_name)
@test_table = table(@test_name)
end
define_test "put should work without timestamp" do
@test_table.put("123", "x:a", "1")
end
define_test "put should work with timestamp" do
@test_table.put("123", "x:a", "2", Time.now.to_i)
end
define_test "put should work with integer keys" do
@test_table.put(123, "x:a", "3")
end
define_test "put should work with integer values" do
@test_table.put("123", "x:a", 4)
end
#-------------------------------------------------------------------------------
define_test "delete should work without timestamp" do
@test_table.delete("123", "x:a")
end
define_test "delete should work with timestamp" do
@test_table.delete("123", "x:a", Time.now.to_i)
end
define_test "delete should work with integer keys" do
@test_table.delete(123, "x:a")
end
#-------------------------------------------------------------------------------
define_test "deleteall should work w/o columns and timestamps" do
@test_table.deleteall("123")
end
define_test "deleteall should work with integer keys" do
@test_table.deleteall(123)
end
#-------------------------------------------------------------------------------
define_test "incr should work w/o value" do
@test_table.incr("123", 'x:cnt1')
end
define_test "incr should work with value" do
@test_table.incr("123", 'x:cnt2', 10)
end
define_test "incr should work with integer keys" do
@test_table.incr(123, 'x:cnt3')
end
end
# Complex data management methods tests
class TableComplexMethodsTest < Test::Unit::TestCase
include TestHelpers
def setup
setup_hbase
# Create test table if it does not exist
@test_name = "hbase_shell_tests_table"
create_test_table(@test_name)
@test_table = table(@test_name)
# Test data
@test_ts = 12345678
@test_table.put(1, "x:a", 1)
@test_table.put(1, "x:b", 2, @test_ts)
@test_table.put(2, "x:a", 11)
@test_table.put(2, "x:b", 12, @test_ts)
end
define_test "count should work w/o a block passed" do
assert(@test_table.count > 0)
end
define_test "count should work with a block passed (and yield)" do
rows = []
cnt = @test_table.count(1) do |cnt, row|
rows << row
end
assert(cnt > 0)
assert(!rows.empty?)
end
#-------------------------------------------------------------------------------
define_test "get should work w/o columns specification" do
res = @test_table.get('1')
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get should work with integer keys" do
res = @test_table.get(1)
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get should work with hash columns spec and a single string COLUMN parameter" do
res = @test_table.get('1', COLUMN => 'x:a')
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_nil(res['x:b'])
end
define_test "get should work with hash columns spec and a single string COLUMNS parameter" do
res = @test_table.get('1', COLUMNS => 'x:a')
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_nil(res['x:b'])
end
define_test "get should work with hash columns spec and an array of strings COLUMN parameter" do
res = @test_table.get('1', COLUMN => [ 'x:a', 'x:b' ])
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get should work with hash columns spec and an array of strings COLUMNS parameter" do
res = @test_table.get('1', COLUMNS => [ 'x:a', 'x:b' ])
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get should work with hash columns spec and TIMESTAMP only" do
res = @test_table.get('1', TIMESTAMP => @test_ts)
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get should fail with hash columns spec and strange COLUMN value" do
assert_raise(ArgumentError) do
@test_table.get('1', COLUMN => {})
end
end
define_test "get should fail with hash columns spec and strange COLUMNS value" do
assert_raise(ArgumentError) do
@test_table.get('1', COLUMN => {})
end
end
define_test "get should fail with hash columns spec and no TIMESTAMP or COLUMN[S]" do
assert_raise(ArgumentError) do
@test_table.get('1', { :foo => :bar })
end
end
define_test "get should work with a string column spec" do
res = @test_table.get('1', 'x:b')
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get should work with an array columns spec" do
res = @test_table.get('1', 'x:a', 'x:b')
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get should work with an array or arrays columns spec (yeah, crazy)" do
res = @test_table.get('1', ['x:a'], ['x:b'])
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
define_test "get with a block should yield (column, value) pairs" do
res = {}
@test_table.get('1') { |col, val| res[col] = val }
assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
end
#-------------------------------------------------------------------------------
define_test "scan should work w/o any params" do
res = @test_table.scan
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['1'])
assert_not_nil(res['1']['x:a'])
assert_not_nil(res['1']['x:b'])
assert_not_nil(res['2'])
assert_not_nil(res['2']['x:a'])
assert_not_nil(res['2']['x:b'])
end
define_test "scan should support STARTROW parameter" do
res = @test_table.scan STARTROW => '2'
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_nil(res['1'])
assert_not_nil(res['2'])
assert_not_nil(res['2']['x:a'])
assert_not_nil(res['2']['x:b'])
end
define_test "scan should support STOPROW parameter" do
res = @test_table.scan STOPROW => '2'
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['1'])
assert_not_nil(res['1']['x:a'])
assert_not_nil(res['1']['x:b'])
assert_nil(res['2'])
end
define_test "scan should support LIMIT parameter" do
res = @test_table.scan LIMIT => 1
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['1'])
assert_not_nil(res['1']['x:a'])
assert_not_nil(res['1']['x:b'])
assert_nil(res['2'])
end
define_test "scan should support TIMESTAMP parameter" do
res = @test_table.scan TIMESTAMP => @test_ts
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['1'])
assert_nil(res['1']['x:a'])
assert_not_nil(res['1']['x:b'])
assert_not_nil(res['2'])
assert_nil(res['2']['x:a'])
assert_not_nil(res['2']['x:b'])
end
define_test "scan should support COLUMNS parameter with an array of columns" do
res = @test_table.scan COLUMNS => [ 'x:a', 'x:b' ]
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['1'])
assert_not_nil(res['1']['x:a'])
assert_not_nil(res['1']['x:b'])
assert_not_nil(res['2'])
assert_not_nil(res['2']['x:a'])
assert_not_nil(res['2']['x:b'])
end
define_test "scan should support COLUMNS parameter with a single column name" do
res = @test_table.scan COLUMNS => 'x:a'
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['1'])
assert_not_nil(res['1']['x:a'])
assert_nil(res['1']['x:b'])
assert_not_nil(res['2'])
assert_not_nil(res['2']['x:a'])
assert_nil(res['2']['x:b'])
end
define_test "scan should fail on invalid COLUMNS parameter types" do
assert_raise(ArgumentError) do
@test_table.scan COLUMNS => {}
end
end
define_test "scan should fail on non-hash params" do
assert_raise(ArgumentError) do
@test_table.scan 123
end
end
define_test "scan with a block should yield rows and return rows counter" do
rows = {}
res = @test_table.scan { |row, cells| rows[row] = cells }
assert_equal(rows.keys.size, res)
end
end
end

View File

@ -0,0 +1,14 @@
require 'shell'
require 'shell/formatter'
class ShellCommandsTest < Test::Unit::TestCase
Shell.commands.each do |name, klass|
define_test "#{name} command class #{klass} should respond to help" do
assert_respond_to(klass.new(nil), :help)
end
define_test "#{name} command class #{klass} should respond to :command" do
assert_respond_to(klass.new(nil), :command)
end
end
end

View File

@ -0,0 +1,49 @@
require 'shell/formatter'
class ShellFormatterTest < Test::Unit::TestCase
# Helper method to construct a null formatter
def formatter
Shell::Formatter::Base.new(:output_stream => STDOUT)
end
#
# Constructor tests
#
define_test "Formatter constructor should not raise error valid IO streams" do
assert_nothing_raised do
Shell::Formatter::Base.new(:output_stream => STDOUT)
end
end
define_test "Formatter constructor should not raise error when no IO stream passed" do
assert_nothing_raised do
Shell::Formatter::Base.new()
end
end
define_test "Formatter constructor should raise error on non-IO streams" do
assert_raise TypeError do
Shell::Formatter::Base.new(:output_stream => 'foostring')
end
end
#-------------------------------------------------------------------------------------------------------
# Printing methods tests
# FIXME: The tests are just checking that the code has no typos, try to figure out a better way to test
#
define_test "Formatter#header should work" do
formatter.header(['a', 'b'])
formatter.header(['a', 'b'], [10, 20])
end
define_test "Formatter#row should work" do
formatter.row(['a', 'b'])
formatter.row(['xxxxxxxxx xxxxxxxxxxx xxxxxxxxxxx xxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxx xxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxxxx'])
formatter.row(['yyyyyy yyyyyy yyyyy yyy', 'xxxxxxxxx xxxxxxxxxxx xxxxxxxxxxx xxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxx xxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxxxxxx xxx xx x xx xxx xx xx xx x xx x x xxx x x xxx x x xx x x x x x x xx '])
formatter.row(["NAME => 'table1', FAMILIES => [{NAME => 'fam2', VERSIONS => 3, COMPRESSION => 'NONE', IN_MEMORY => false, BLOCKCACHE => false, LENGTH => 2147483647, TTL => FOREVER, BLOOMFILTER => NONE}, {NAME => 'fam1', VERSIONS => 3, COMPRESSION => 'NONE', IN_MEMORY => false, BLOCKCACHE => false, LENGTH => 2147483647, TTL => FOREVER, BLOOMFILTER => NONE}]"])
end
define_test "Froematter#footer should work" do
formatter.footer(Time.now - 5)
end
end

View File

@ -0,0 +1,50 @@
require 'hbase'
require 'shell'
require 'shell/formatter'
class ShellTest < Test::Unit::TestCase
def setup
@formatter = ::Shell::Formatter::Console.new(:format_width => 110)
@hbase = ::Hbase::Hbase.new
@shell = Shell::Shell.new(@hbase, @formatter)
end
define_test "Shell::Shell#hbase_admin should return an admin instance" do
assert_kind_of(Hbase::Admin, @shell.hbase_admin)
end
define_test "Shell::Shell#hbase_admin should cache admin instances" do
assert_same(@shell.hbase_admin, @shell.hbase_admin)
end
#-------------------------------------------------------------------------------
define_test "Shell::Shell#hbase_table should return a table instance" do
assert_kind_of(Hbase::Table, @shell.hbase_table('.META.'))
end
define_test "Shell::Shell#hbase_table should not cache table instances" do
assert_not_same(@shell.hbase_table('.META.'), @shell.hbase_table('.META.'))
end
#-------------------------------------------------------------------------------
define_test "Shell::Shell#export_commands should export command methods to specified object" do
module Foo; end
assert(!Foo.respond_to?(:version))
@shell.export_commands(Foo)
assert(Foo.respond_to?(:version))
end
#-------------------------------------------------------------------------------
define_test "Shell::Shell#command_instance should return a command class" do
assert_kind_of(Shell::Commands::Command, @shell.command_instance('version'))
end
#-------------------------------------------------------------------------------
define_test "Shell::Shell#command should execute a command" do
@shell.command('version')
end
end

View File

@ -0,0 +1,72 @@
require 'test/unit'
module Testing
module Declarative
# define_test "should do something" do
# ...
# end
def define_test(name, &block)
test_name = "test_#{name.gsub(/\s+/,'_')}".to_sym
defined = instance_method(test_name) rescue false
raise "#{test_name} is already defined in #{self}" if defined
if block_given?
define_method(test_name, &block)
else
define_method(test_name) do
flunk "No implementation provided for #{name}"
end
end
end
end
end
module Hbase
module TestHelpers
def setup_hbase
@formatter = Shell::Formatter::Console.new(:format_width => 110)
@hbase = ::Hbase::Hbase.new($TEST_CLUSTER.getConfiguration)
end
def table(table)
@hbase.table(table, @formatter)
end
def admin
@hbase.admin(@formatter)
end
def create_test_table(name)
# Create the table if needed
unless admin.exists?(name)
admin.create name, [{'NAME' => 'x', 'VERSIONS' => 5}, 'y']
return
end
# Enable the table if needed
unless admin.enabled?(name)
admin.enable(name)
end
end
def drop_test_table(name)
return unless admin.exists?(name)
begin
admin.disable(name) if admin.enabled?(name)
rescue => e
puts "IGNORING DISABLE TABLE ERROR: #{e}"
end
begin
admin.drop(name)
rescue => e
puts "IGNORING DROP TABLE ERROR: #{e}"
end
end
end
end
# Extend standard unit tests with our helpers
Test::Unit::TestCase.extend(Testing::Declarative)
# Add the $HBASE_HOME/lib/ruby directory to the ruby
# load path so I can load up my HBase ruby modules
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "..", "..", "main", "ruby")

View File

@ -0,0 +1,18 @@
require 'rubygems'
require 'rake'
require 'test_helper'
puts "Running tests..."
files = Dir[ File.dirname(__FILE__) + "/**/*_test.rb" ]
files.each do |file|
begin
load(file)
rescue => e
puts "ERROR: #{e}"
raise
end
end
puts "Done with tests!"

View File

@ -101,6 +101,10 @@
<include>bin/**</include>
</includes>
</fileSet>
<fileSet>
<directory>core/src/main/ruby</directory>
<outputDirectory>lib/ruby</outputDirectory>
</fileSet>
<fileSet>
<directory>contrib/transactional</directory>
<outputDirectory>contrib/transactional</outputDirectory>