HBASE-6592 [shell] Add means of custom formatting output by column
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1387369 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
65a2962cdc
commit
4bd1ffbd1e
|
@ -113,6 +113,7 @@ EOF
|
||||||
@table = org.apache.hadoop.hbase.client.HTable.new(configuration, table_name)
|
@table = org.apache.hadoop.hbase.client.HTable.new(configuration, table_name)
|
||||||
@name = table_name
|
@name = table_name
|
||||||
@shell = shell
|
@shell = shell
|
||||||
|
@converters = Hash.new()
|
||||||
end
|
end
|
||||||
|
|
||||||
# Note the below methods are prefixed with '_' to hide them from the average user, as
|
# Note the below methods are prefixed with '_' to hide them from the average user, as
|
||||||
|
@ -187,7 +188,8 @@ EOF
|
||||||
def _get_internal(row, *args)
|
def _get_internal(row, *args)
|
||||||
get = org.apache.hadoop.hbase.client.Get.new(row.to_s.to_java_bytes)
|
get = org.apache.hadoop.hbase.client.Get.new(row.to_s.to_java_bytes)
|
||||||
maxlength = -1
|
maxlength = -1
|
||||||
|
@converters.clear()
|
||||||
|
|
||||||
# Normalize args
|
# Normalize args
|
||||||
args = args.first if args.first.kind_of?(Hash)
|
args = args.first if args.first.kind_of?(Hash)
|
||||||
if args.kind_of?(String) || args.kind_of?(Array)
|
if args.kind_of?(String) || args.kind_of?(Array)
|
||||||
|
@ -299,6 +301,7 @@ EOF
|
||||||
|
|
||||||
limit = args.delete("LIMIT") || -1
|
limit = args.delete("LIMIT") || -1
|
||||||
maxlength = args.delete("MAXLENGTH") || -1
|
maxlength = args.delete("MAXLENGTH") || -1
|
||||||
|
@converters.clear()
|
||||||
|
|
||||||
if args.any?
|
if args.any?
|
||||||
filter = args["FILTER"]
|
filter = args["FILTER"]
|
||||||
|
@ -450,6 +453,7 @@ EOF
|
||||||
# Returns family and (when has it) qualifier for a column name
|
# Returns family and (when has it) qualifier for a column name
|
||||||
def parse_column_name(column)
|
def parse_column_name(column)
|
||||||
split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes)
|
split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes)
|
||||||
|
set_converter(split) if split.length > 1
|
||||||
return split[0], (split.length > 1) ? split[1] : nil
|
return split[0], (split.length > 1) ? split[1] : nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -474,9 +478,42 @@ EOF
|
||||||
if kv.isDelete
|
if kv.isDelete
|
||||||
val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}"
|
val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}"
|
||||||
else
|
else
|
||||||
val = "timestamp=#{kv.getTimestamp}, value=#{org.apache.hadoop.hbase.util.Bytes::toStringBinary(kv.getValue)}"
|
val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv)}"
|
||||||
end
|
end
|
||||||
(maxlength != -1) ? val[0, maxlength] : val
|
(maxlength != -1) ? val[0, maxlength] : val
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def convert(column, kv)
|
||||||
|
#use org.apache.hadoop.hbase.util.Bytes as the default class
|
||||||
|
klazz_name = 'org.apache.hadoop.hbase.util.Bytes'
|
||||||
|
#use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor
|
||||||
|
converter = 'toStringBinary'
|
||||||
|
if @converters.has_key?(column)
|
||||||
|
# lookup the CONVERTER for certain column - "cf:qualifier"
|
||||||
|
matches = /c\((.+)\)\.(.+)/.match(@converters[column])
|
||||||
|
if matches.nil?
|
||||||
|
# cannot match the pattern of 'c(className).functionname'
|
||||||
|
# use the default klazz_name
|
||||||
|
converter = @converters[column]
|
||||||
|
else
|
||||||
|
klazz_name = matches[1]
|
||||||
|
converter = matches[2]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
method = eval(klazz_name).method(converter)
|
||||||
|
return method.call(kv.getValue) # apply the converter
|
||||||
|
end
|
||||||
|
|
||||||
|
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
|
||||||
|
# 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
|
||||||
|
# 2. register the CONVERTER information based on column spec - "cf:qualifier"
|
||||||
|
def set_converter(column)
|
||||||
|
family = String.from_java_bytes(column[0])
|
||||||
|
parts = org.apache.hadoop.hbase.KeyValue.parseColumn(column[1])
|
||||||
|
if parts.length > 1
|
||||||
|
@converters["#{family}:#{String.from_java_bytes(parts[0])}"] = String.from_java_bytes(parts[1])
|
||||||
|
column[1] = parts[0]
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -36,8 +36,23 @@ a dictionary of column(s), timestamp, timerange and versions. Examples:
|
||||||
hbase> get 't1', 'r1', 'c1', 'c2'
|
hbase> get 't1', 'r1', 'c1', 'c2'
|
||||||
hbase> get 't1', 'r1', ['c1', 'c2']
|
hbase> get 't1', 'r1', ['c1', 'c2']
|
||||||
|
|
||||||
|
Besides the default 'toStringBinary' format, 'get' also supports custom formatting by
|
||||||
|
column. A user can define a FORMATTER by adding it to the column name in the get
|
||||||
|
specification. The FORMATTER can be stipulated:
|
||||||
|
|
||||||
|
1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
|
||||||
|
2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.
|
||||||
|
|
||||||
|
Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
|
||||||
|
hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt',
|
||||||
|
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
|
||||||
|
|
||||||
|
Note that you can specify a FORMATTER by column only (cf:qualifer). You cannot specify
|
||||||
|
a FORMATTER for all columns of a column family.
|
||||||
|
|
||||||
The same commands also can be run on a reference to a table (obtained via get_table or
|
The same commands also can be run on a reference to a table (obtained via get_table or
|
||||||
create_table). Suppose you had a reference t to table 't1', the corresponding commands would be:
|
create_table). Suppose you had a reference t to table 't1', the corresponding commands
|
||||||
|
would be:
|
||||||
|
|
||||||
hbase> t.get 'r1'
|
hbase> t.get 'r1'
|
||||||
hbase> t.get 'r1', {TIMERANGE => [ts1, ts2]}
|
hbase> t.get 'r1', {TIMERANGE => [ts1, ts2]}
|
||||||
|
|
|
@ -42,8 +42,10 @@ Some examples:
|
||||||
hbase> scan '.META.', {COLUMNS => 'info:regioninfo'}
|
hbase> scan '.META.', {COLUMNS => 'info:regioninfo'}
|
||||||
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
|
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
|
||||||
hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]}
|
hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]}
|
||||||
hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND (QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"}
|
hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND
|
||||||
hbase> scan 't1', {FILTER => org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
|
(QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"}
|
||||||
|
hbase> scan 't1', {FILTER =>
|
||||||
|
org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
|
||||||
|
|
||||||
For experts, there is an additional option -- CACHE_BLOCKS -- which
|
For experts, there is an additional option -- CACHE_BLOCKS -- which
|
||||||
switches block caching for the scanner on (true) or off (false). By
|
switches block caching for the scanner on (true) or off (false). By
|
||||||
|
@ -58,13 +60,29 @@ Disabled by default. Example:
|
||||||
|
|
||||||
hbase> scan 't1', {RAW => true, VERSIONS => 10}
|
hbase> scan 't1', {RAW => true, VERSIONS => 10}
|
||||||
|
|
||||||
Scan can also be used directly from a table, by first getting a reference to a table, like such:
|
Besides the default 'toStringBinary' format, 'scan' supports custom formatting
|
||||||
|
by column. A user can define a FORMATTER by adding it to the column name in
|
||||||
|
the scan specification. The FORMATTER can be stipulated:
|
||||||
|
|
||||||
|
1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
|
||||||
|
2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.
|
||||||
|
|
||||||
|
Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
|
||||||
|
hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt',
|
||||||
|
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
|
||||||
|
|
||||||
|
Note that you can specify a FORMATTER by column only (cf:qualifer). You cannot
|
||||||
|
specify a FORMATTER for all columns of a column family.
|
||||||
|
|
||||||
|
Scan can also be used directly from a table, by first getting a reference to a
|
||||||
|
table, like such:
|
||||||
|
|
||||||
hbase> t = get_table 't'
|
hbase> t = get_table 't'
|
||||||
hbase> t.scan
|
hbase> t.scan
|
||||||
|
|
||||||
Note in the above situation, you can still provide all the filtering, columns, options, etc as
|
Note in the above situation, you can still provide all the filtering, columns,
|
||||||
described above.
|
options, etc as described above.
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -311,6 +311,22 @@ module Hbase
|
||||||
@test_table._get_internal('1') { |col, val| res[col] = val }
|
@test_table._get_internal('1') { |col, val| res[col] = val }
|
||||||
assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
|
assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
define_test "get should support COLUMNS with value CONVERTER information" do
|
||||||
|
@test_table.put(1, "x:c", [1024].pack('N'))
|
||||||
|
@test_table.put(1, "x:d", [98].pack('N'))
|
||||||
|
begin
|
||||||
|
res = @test_table._get_internal('1', ['x:c:toInt'], ['x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt'])
|
||||||
|
assert_not_nil(res)
|
||||||
|
assert_kind_of(Hash, res)
|
||||||
|
assert_not_nil(/value=1024/.match(res['x:c']))
|
||||||
|
assert_not_nil(/value=98/.match(res['x:d']))
|
||||||
|
ensure
|
||||||
|
# clean up newly added columns for this test only.
|
||||||
|
@test_table.delete(1, "x:c")
|
||||||
|
@test_table.delete(1, "x:d")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -417,5 +433,22 @@ module Hbase
|
||||||
res = @test_table._scan_internal { |row, cells| rows[row] = cells }
|
res = @test_table._scan_internal { |row, cells| rows[row] = cells }
|
||||||
assert_equal(rows.keys.size, res)
|
assert_equal(rows.keys.size, res)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
define_test "scan should support COLUMNS with value CONVERTER information" do
|
||||||
|
@test_table.put(1, "x:c", [1024].pack('N'))
|
||||||
|
@test_table.put(1, "x:d", [98].pack('N'))
|
||||||
|
begin
|
||||||
|
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']
|
||||||
|
assert_not_nil(res)
|
||||||
|
assert_kind_of(Hash, res)
|
||||||
|
assert_not_nil(/value=1024/.match(res['1']['x:c']))
|
||||||
|
assert_not_nil(/value=98/.match(res['1']['x:d']))
|
||||||
|
ensure
|
||||||
|
# clean up newly added columns for this test only.
|
||||||
|
@test_table.delete(1, "x:c")
|
||||||
|
@test_table.delete(1, "x:d")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue