HBASE-6592 [shell] Add means of custom formatting output by column

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1387369 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-09-18 21:41:20 +00:00
parent 65a2962cdc
commit 4bd1ffbd1e
4 changed files with 111 additions and 8 deletions

View File

@ -113,6 +113,7 @@ EOF
@table = org.apache.hadoop.hbase.client.HTable.new(configuration, table_name) @table = org.apache.hadoop.hbase.client.HTable.new(configuration, table_name)
@name = table_name @name = table_name
@shell = shell @shell = shell
@converters = Hash.new()
end end
# Note the below methods are prefixed with '_' to hide them from the average user, as # Note the below methods are prefixed with '_' to hide them from the average user, as
@ -187,7 +188,8 @@ EOF
def _get_internal(row, *args) def _get_internal(row, *args)
get = org.apache.hadoop.hbase.client.Get.new(row.to_s.to_java_bytes) get = org.apache.hadoop.hbase.client.Get.new(row.to_s.to_java_bytes)
maxlength = -1 maxlength = -1
@converters.clear()
# Normalize args # Normalize args
args = args.first if args.first.kind_of?(Hash) args = args.first if args.first.kind_of?(Hash)
if args.kind_of?(String) || args.kind_of?(Array) if args.kind_of?(String) || args.kind_of?(Array)
@ -299,6 +301,7 @@ EOF
limit = args.delete("LIMIT") || -1 limit = args.delete("LIMIT") || -1
maxlength = args.delete("MAXLENGTH") || -1 maxlength = args.delete("MAXLENGTH") || -1
@converters.clear()
if args.any? if args.any?
filter = args["FILTER"] filter = args["FILTER"]
@ -450,6 +453,7 @@ EOF
# Returns family and (when has it) qualifier for a column name # Returns family and (when has it) qualifier for a column name
def parse_column_name(column) def parse_column_name(column)
split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes) split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes)
set_converter(split) if split.length > 1
return split[0], (split.length > 1) ? split[1] : nil return split[0], (split.length > 1) ? split[1] : nil
end end
@ -474,9 +478,42 @@ EOF
if kv.isDelete if kv.isDelete
val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}" val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}"
else else
val = "timestamp=#{kv.getTimestamp}, value=#{org.apache.hadoop.hbase.util.Bytes::toStringBinary(kv.getValue)}" val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv)}"
end end
(maxlength != -1) ? val[0, maxlength] : val (maxlength != -1) ? val[0, maxlength] : val
end end
def convert(column, kv)
#use org.apache.hadoop.hbase.util.Bytes as the default class
klazz_name = 'org.apache.hadoop.hbase.util.Bytes'
#use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor
converter = 'toStringBinary'
if @converters.has_key?(column)
# lookup the CONVERTER for certain column - "cf:qualifier"
matches = /c\((.+)\)\.(.+)/.match(@converters[column])
if matches.nil?
# cannot match the pattern of 'c(className).functionname'
# use the default klazz_name
converter = @converters[column]
else
klazz_name = matches[1]
converter = matches[2]
end
end
method = eval(klazz_name).method(converter)
return method.call(kv.getValue) # apply the converter
end
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
# 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
# 2. register the CONVERTER information based on column spec - "cf:qualifier"
def set_converter(column)
family = String.from_java_bytes(column[0])
parts = org.apache.hadoop.hbase.KeyValue.parseColumn(column[1])
if parts.length > 1
@converters["#{family}:#{String.from_java_bytes(parts[0])}"] = String.from_java_bytes(parts[1])
column[1] = parts[0]
end
end
end end
end end

View File

@ -36,8 +36,23 @@ a dictionary of column(s), timestamp, timerange and versions. Examples:
hbase> get 't1', 'r1', 'c1', 'c2' hbase> get 't1', 'r1', 'c1', 'c2'
hbase> get 't1', 'r1', ['c1', 'c2'] hbase> get 't1', 'r1', ['c1', 'c2']
Besides the default 'toStringBinary' format, 'get' also supports custom formatting by
column. A user can define a FORMATTER by adding it to the column name in the get
specification. The FORMATTER can be stipulated:
1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.
Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt',
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
Note that you can specify a FORMATTER by column only (cf:qualifer). You cannot specify
a FORMATTER for all columns of a column family.
The same commands also can be run on a reference to a table (obtained via get_table or The same commands also can be run on a reference to a table (obtained via get_table or
create_table). Suppose you had a reference t to table 't1', the corresponding commands would be: create_table). Suppose you had a reference t to table 't1', the corresponding commands
would be:
hbase> t.get 'r1' hbase> t.get 'r1'
hbase> t.get 'r1', {TIMERANGE => [ts1, ts2]} hbase> t.get 'r1', {TIMERANGE => [ts1, ts2]}

View File

@ -42,8 +42,10 @@ Some examples:
hbase> scan '.META.', {COLUMNS => 'info:regioninfo'} hbase> scan '.META.', {COLUMNS => 'info:regioninfo'}
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'} hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]} hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]}
hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND (QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"} hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND
hbase> scan 't1', {FILTER => org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)} (QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"}
hbase> scan 't1', {FILTER =>
org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
For experts, there is an additional option -- CACHE_BLOCKS -- which For experts, there is an additional option -- CACHE_BLOCKS -- which
switches block caching for the scanner on (true) or off (false). By switches block caching for the scanner on (true) or off (false). By
@ -58,13 +60,29 @@ Disabled by default. Example:
hbase> scan 't1', {RAW => true, VERSIONS => 10} hbase> scan 't1', {RAW => true, VERSIONS => 10}
Scan can also be used directly from a table, by first getting a reference to a table, like such: Besides the default 'toStringBinary' format, 'scan' supports custom formatting
by column. A user can define a FORMATTER by adding it to the column name in
the scan specification. The FORMATTER can be stipulated:
1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.
Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt',
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
Note that you can specify a FORMATTER by column only (cf:qualifer). You cannot
specify a FORMATTER for all columns of a column family.
Scan can also be used directly from a table, by first getting a reference to a
table, like such:
hbase> t = get_table 't' hbase> t = get_table 't'
hbase> t.scan hbase> t.scan
Note in the above situation, you can still provide all the filtering, columns, options, etc as Note in the above situation, you can still provide all the filtering, columns,
described above. options, etc as described above.
EOF EOF
end end

View File

@ -311,6 +311,22 @@ module Hbase
@test_table._get_internal('1') { |col, val| res[col] = val } @test_table._get_internal('1') { |col, val| res[col] = val }
assert_equal(res.keys.sort, [ 'x:a', 'x:b' ]) assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
end end
define_test "get should support COLUMNS with value CONVERTER information" do
@test_table.put(1, "x:c", [1024].pack('N'))
@test_table.put(1, "x:d", [98].pack('N'))
begin
res = @test_table._get_internal('1', ['x:c:toInt'], ['x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt'])
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(/value=1024/.match(res['x:c']))
assert_not_nil(/value=98/.match(res['x:d']))
ensure
# clean up newly added columns for this test only.
@test_table.delete(1, "x:c")
@test_table.delete(1, "x:d")
end
end
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
@ -417,5 +433,22 @@ module Hbase
res = @test_table._scan_internal { |row, cells| rows[row] = cells } res = @test_table._scan_internal { |row, cells| rows[row] = cells }
assert_equal(rows.keys.size, res) assert_equal(rows.keys.size, res)
end end
define_test "scan should support COLUMNS with value CONVERTER information" do
@test_table.put(1, "x:c", [1024].pack('N'))
@test_table.put(1, "x:d", [98].pack('N'))
begin
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(/value=1024/.match(res['1']['x:c']))
assert_not_nil(/value=98/.match(res['1']['x:d']))
ensure
# clean up newly added columns for this test only.
@test_table.delete(1, "x:c")
@test_table.delete(1, "x:d")
end
end
end end
end end