HBASE-18067 Allow default FORMATTER for shell put/get commands
This commit is contained in:
parent
553d5db355
commit
f1544c3466
|
@ -347,6 +347,8 @@ EOF
|
||||||
authorizations = args[AUTHORIZATIONS]
|
authorizations = args[AUTHORIZATIONS]
|
||||||
consistency = args.delete(CONSISTENCY) if args[CONSISTENCY]
|
consistency = args.delete(CONSISTENCY) if args[CONSISTENCY]
|
||||||
replicaId = args.delete(REGION_REPLICA_ID) if args[REGION_REPLICA_ID]
|
replicaId = args.delete(REGION_REPLICA_ID) if args[REGION_REPLICA_ID]
|
||||||
|
converter = args.delete(FORMATTER) || nil
|
||||||
|
converter_class = args.delete(FORMATTER_CLASS) || 'org.apache.hadoop.hbase.util.Bytes'
|
||||||
unless args.empty?
|
unless args.empty?
|
||||||
columns = args[COLUMN] || args[COLUMNS]
|
columns = args[COLUMN] || args[COLUMNS]
|
||||||
if args[VERSIONS]
|
if args[VERSIONS]
|
||||||
|
@ -419,13 +421,13 @@ EOF
|
||||||
# Print out results. Result can be Cell or RowResult.
|
# Print out results. Result can be Cell or RowResult.
|
||||||
res = {}
|
res = {}
|
||||||
result.listCells.each do |c|
|
result.listCells.each do |c|
|
||||||
family = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getFamilyArray,
|
family = convert_bytes_with_position(c.getFamilyArray,
|
||||||
c.getFamilyOffset, c.getFamilyLength)
|
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
|
||||||
qualifier = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getQualifierArray,
|
qualifier = convert_bytes_with_position(c.getQualifierArray,
|
||||||
c.getQualifierOffset, c.getQualifierLength)
|
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
|
||||||
|
|
||||||
column = "#{family}:#{qualifier}"
|
column = "#{family}:#{qualifier}"
|
||||||
value = to_string(column, c, maxlength)
|
value = to_string(column, c, maxlength, converter_class, converter)
|
||||||
|
|
||||||
if block_given?
|
if block_given?
|
||||||
yield(column, value)
|
yield(column, value)
|
||||||
|
@ -544,6 +546,8 @@ EOF
|
||||||
|
|
||||||
limit = args["LIMIT"] || -1
|
limit = args["LIMIT"] || -1
|
||||||
maxlength = args.delete("MAXLENGTH") || -1
|
maxlength = args.delete("MAXLENGTH") || -1
|
||||||
|
converter = args.delete(FORMATTER) || nil
|
||||||
|
converter_class = args.delete(FORMATTER_CLASS) || 'org.apache.hadoop.hbase.util.Bytes'
|
||||||
count = 0
|
count = 0
|
||||||
res = {}
|
res = {}
|
||||||
|
|
||||||
|
@ -555,17 +559,17 @@ EOF
|
||||||
# Iterate results
|
# Iterate results
|
||||||
while iter.hasNext
|
while iter.hasNext
|
||||||
row = iter.next
|
row = iter.next
|
||||||
key = org.apache.hadoop.hbase.util.Bytes::toStringBinary(row.getRow)
|
key = convert_bytes(row.getRow, nil, converter)
|
||||||
is_stale |= row.isStale
|
is_stale |= row.isStale
|
||||||
|
|
||||||
row.listCells.each do |c|
|
row.listCells.each do |c|
|
||||||
family = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getFamilyArray,
|
family = convert_bytes_with_position(c.getFamilyArray,
|
||||||
c.getFamilyOffset, c.getFamilyLength)
|
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
|
||||||
qualifier = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getQualifierArray,
|
qualifier = convert_bytes_with_position(c.getQualifierArray,
|
||||||
c.getQualifierOffset, c.getQualifierLength)
|
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
|
||||||
|
|
||||||
column = "#{family}:#{qualifier}"
|
column = "#{family}:#{qualifier}"
|
||||||
cell = to_string(column, c, maxlength)
|
cell = to_string(column, c, maxlength, converter_class, converter)
|
||||||
|
|
||||||
if block_given?
|
if block_given?
|
||||||
yield(key, "column=#{column}, #{cell}")
|
yield(key, "column=#{column}, #{cell}")
|
||||||
|
@ -693,7 +697,7 @@ EOF
|
||||||
|
|
||||||
# Make a String of the passed kv
|
# Make a String of the passed kv
|
||||||
# Intercept cells whose format we know such as the info:regioninfo in hbase:meta
|
# Intercept cells whose format we know such as the info:regioninfo in hbase:meta
|
||||||
def to_string(column, kv, maxlength = -1)
|
def to_string(column, kv, maxlength = -1, converter_class = nil, converter=nil)
|
||||||
if is_meta_table?
|
if is_meta_table?
|
||||||
if column == 'info:regioninfo' or column == 'info:splitA' or column == 'info:splitB'
|
if column == 'info:regioninfo' or column == 'info:splitA' or column == 'info:splitB'
|
||||||
hri = org.apache.hadoop.hbase.HRegionInfo.parseFromOrNull(kv.getValueArray,
|
hri = org.apache.hadoop.hbase.HRegionInfo.parseFromOrNull(kv.getValueArray,
|
||||||
|
@ -715,16 +719,16 @@ EOF
|
||||||
if kv.isDelete
|
if kv.isDelete
|
||||||
val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}"
|
val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}"
|
||||||
else
|
else
|
||||||
val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv)}"
|
val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv, converter_class, converter)}"
|
||||||
end
|
end
|
||||||
(maxlength != -1) ? val[0, maxlength] : val
|
(maxlength != -1) ? val[0, maxlength] : val
|
||||||
end
|
end
|
||||||
|
|
||||||
def convert(column, kv)
|
def convert(column, kv, converter_class='org.apache.hadoop.hbase.util.Bytes', converter='toStringBinary')
|
||||||
#use org.apache.hadoop.hbase.util.Bytes as the default class
|
#use org.apache.hadoop.hbase.util.Bytes as the default class
|
||||||
klazz_name = 'org.apache.hadoop.hbase.util.Bytes'
|
converter_class = 'org.apache.hadoop.hbase.util.Bytes' unless converter_class
|
||||||
#use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor
|
#use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor
|
||||||
converter = 'toStringBinary'
|
converter = 'toStringBinary' unless converter
|
||||||
if @converters.has_key?(column)
|
if @converters.has_key?(column)
|
||||||
# lookup the CONVERTER for certain column - "cf:qualifier"
|
# lookup the CONVERTER for certain column - "cf:qualifier"
|
||||||
matches = /c\((.+)\)\.(.+)/.match(@converters[column])
|
matches = /c\((.+)\)\.(.+)/.match(@converters[column])
|
||||||
|
@ -737,8 +741,19 @@ EOF
|
||||||
converter = matches[2]
|
converter = matches[2]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
method = eval(klazz_name).method(converter)
|
# apply the converter
|
||||||
return method.call(org.apache.hadoop.hbase.CellUtil.cloneValue(kv)) # apply the converter
|
convert_bytes(org.apache.hadoop.hbase.CellUtil.cloneValue(kv), klazz_name, converter)
|
||||||
|
end
|
||||||
|
|
||||||
|
def convert_bytes(bytes, converter_class=nil, converter_method=nil)
|
||||||
|
convert_bytes_with_position(bytes, 0, bytes.length, converter_class, converter_method)
|
||||||
|
end
|
||||||
|
|
||||||
|
def convert_bytes_with_position(bytes, offset, len, converter_class, converter_method)
|
||||||
|
# Avoid nil
|
||||||
|
converter_class = 'org.apache.hadoop.hbase.util.Bytes' unless converter_class
|
||||||
|
converter_method = 'toStringBinary' unless converter_method
|
||||||
|
eval(converter_class).method(converter_method).call(bytes, offset, len)
|
||||||
end
|
end
|
||||||
|
|
||||||
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
|
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
|
||||||
|
|
|
@ -84,6 +84,8 @@ module HBaseConstants
|
||||||
SERVER_NAME = 'SERVER_NAME'
|
SERVER_NAME = 'SERVER_NAME'
|
||||||
LOCALITY_THRESHOLD = 'LOCALITY_THRESHOLD'
|
LOCALITY_THRESHOLD = 'LOCALITY_THRESHOLD'
|
||||||
RESTORE_ACL = 'RESTORE_ACL'
|
RESTORE_ACL = 'RESTORE_ACL'
|
||||||
|
FORMATTER = 'FORMATTER'
|
||||||
|
FORMATTER_CLASS = 'FORMATTER_CLASS'
|
||||||
|
|
||||||
# Load constants from hbase java API
|
# Load constants from hbase java API
|
||||||
def self.promote_constants(constants)
|
def self.promote_constants(constants)
|
||||||
|
|
|
@ -53,8 +53,13 @@ Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
|
||||||
hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt',
|
hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt',
|
||||||
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
|
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
|
||||||
|
|
||||||
Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot specify
|
Note that you can specify a FORMATTER by column only (cf:qualifier). You can set a
|
||||||
a FORMATTER for all columns of a column family.
|
formatter for all columns (including, all key parts) using the "FORMATTER"
|
||||||
|
and "FORMATTER_CLASS" options. The default "FORMATTER_CLASS" is
|
||||||
|
"org.apache.hadoop.hbase.util.Bytes".
|
||||||
|
|
||||||
|
hbase> get 't1', 'r1', {FORMATTER => 'toString'}
|
||||||
|
hbase> get 't1', 'r1', {FORMATTER_CLASS => 'org.apache.hadoop.hbase.util.Bytes', FORMATTER => 'toString'}
|
||||||
|
|
||||||
The same commands also can be run on a reference to a table (obtained via get_table or
|
The same commands also can be run on a reference to a table (obtained via get_table or
|
||||||
create_table). Suppose you had a reference t to table 't1', the corresponding commands
|
create_table). Suppose you had a reference t to table 't1', the corresponding commands
|
||||||
|
|
|
@ -83,8 +83,13 @@ Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
|
||||||
hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt',
|
hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt',
|
||||||
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
|
'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }
|
||||||
|
|
||||||
Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot
|
Note that you can specify a FORMATTER by column only (cf:qualifier). You can set a
|
||||||
specify a FORMATTER for all columns of a column family.
|
formatter for all columns (including, all key parts) using the "FORMATTER"
|
||||||
|
and "FORMATTER_CLASS" options. The default "FORMATTER_CLASS" is
|
||||||
|
"org.apache.hadoop.hbase.util.Bytes".
|
||||||
|
|
||||||
|
hbase> scan 't1', {FORMATTER => 'toString'}
|
||||||
|
hbase> scan 't1', {FORMATTER_CLASS => 'org.apache.hadoop.hbase.util.Bytes', FORMATTER => 'toString'}
|
||||||
|
|
||||||
Scan can also be used directly from a table, by first getting a reference to a
|
Scan can also be used directly from a table, by first getting a reference to a
|
||||||
table, like such:
|
table, like such:
|
||||||
|
|
|
@ -0,0 +1,157 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
require 'hbase_constants'
|
||||||
|
require 'shell'
|
||||||
|
|
||||||
|
include HBaseConstants
|
||||||
|
|
||||||
|
module Hbase
|
||||||
|
class ConverterTest < Test::Unit::TestCase
|
||||||
|
include TestHelpers
|
||||||
|
|
||||||
|
non_ascii_text = '⻆⻇'
|
||||||
|
non_ascii_row = '⻄'
|
||||||
|
non_ascii_family = 'ㄹ'
|
||||||
|
non_ascii_qualifier = '⻅'
|
||||||
|
non_ascii_column = "#{non_ascii_family}:#{non_ascii_qualifier}"
|
||||||
|
hex_text = '\xE2\xBB\x86\xE2\xBB\x87'
|
||||||
|
hex_row = '\xE2\xBB\x84'
|
||||||
|
hex_family = '\xE3\x84\xB9'
|
||||||
|
hex_qualifier = '\xE2\xBB\x85'
|
||||||
|
hex_column = "#{hex_family}:#{hex_qualifier}"
|
||||||
|
|
||||||
|
def setup
|
||||||
|
setup_hbase
|
||||||
|
end
|
||||||
|
|
||||||
|
def teardown
|
||||||
|
shutdown
|
||||||
|
end
|
||||||
|
|
||||||
|
define_test 'Test scan for non-ascii data' do
|
||||||
|
table_name = 'scan-test'
|
||||||
|
create_test_table(table_name)
|
||||||
|
# Write a record
|
||||||
|
command(:put, table_name, 'r1', 'x:a', non_ascii_text)
|
||||||
|
output = capture_stdout{ command(:scan, table_name) }
|
||||||
|
# Encoded value not there by default
|
||||||
|
assert(!output.include?(non_ascii_text))
|
||||||
|
# Hex-encoded value is there by default (manually converted)
|
||||||
|
assert(output.include?(hex_text))
|
||||||
|
|
||||||
|
# Use the formatter method
|
||||||
|
output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString'}) }
|
||||||
|
# Should have chinese characters
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
# Should not have hex-encoded string
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
|
||||||
|
# Use the formatter method + class
|
||||||
|
output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) }
|
||||||
|
# Should have chinese characters
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
# Should not have hex-encoded string
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
|
||||||
|
command(:disable, table_name)
|
||||||
|
command(:drop, table_name)
|
||||||
|
command(:create, table_name, non_ascii_family)
|
||||||
|
|
||||||
|
command(:put, table_name, non_ascii_row, non_ascii_column, non_ascii_text)
|
||||||
|
output = capture_stdout{ command(:scan, table_name) }
|
||||||
|
# By default, get hex-encoded data
|
||||||
|
assert(!output.include?(non_ascii_text))
|
||||||
|
assert(!output.include?(non_ascii_row))
|
||||||
|
assert(!output.include?(non_ascii_column))
|
||||||
|
assert(output.include?(hex_text))
|
||||||
|
assert(output.include?(hex_row))
|
||||||
|
assert(output.include?(hex_column))
|
||||||
|
|
||||||
|
# Use the formatter method
|
||||||
|
output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString'}) }
|
||||||
|
# By default, get hex-encoded data
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
assert(output.include?(non_ascii_row))
|
||||||
|
assert(output.include?(non_ascii_column))
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
assert(!output.include?(hex_row))
|
||||||
|
assert(!output.include?(hex_column))
|
||||||
|
|
||||||
|
# Use the formatter method + class
|
||||||
|
output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) }
|
||||||
|
# By default, get hex-encoded data
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
assert(output.include?(non_ascii_row))
|
||||||
|
assert(output.include?(non_ascii_column))
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
assert(!output.include?(hex_row))
|
||||||
|
assert(!output.include?(hex_column))
|
||||||
|
end
|
||||||
|
|
||||||
|
define_test 'Test get for non-ascii data' do
|
||||||
|
table_name = 'get-test'
|
||||||
|
create_test_table(table_name)
|
||||||
|
# Write a record
|
||||||
|
command(:put, table_name, 'r1', 'x:a', non_ascii_text)
|
||||||
|
output = capture_stdout{ command(:get, table_name, 'r1') }
|
||||||
|
# Encoded value not there by default
|
||||||
|
assert(!output.include?(non_ascii_text))
|
||||||
|
# Hex-encoded value is there by default (manually converted)
|
||||||
|
assert(output.include?(hex_text))
|
||||||
|
|
||||||
|
# use the formatter method
|
||||||
|
output = capture_stdout{ command(:get, table_name, 'r1', {'FORMATTER'=>'toString'}) }
|
||||||
|
# Should have chinese characters
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
# Should not have hex-encoded string
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
|
||||||
|
# use the formatter method + class
|
||||||
|
output = capture_stdout{ command(:get, table_name, 'r1', {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) }
|
||||||
|
# Should have chinese characters
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
# Should not have hex-encoded string
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
|
||||||
|
command(:disable, table_name)
|
||||||
|
command(:drop, table_name)
|
||||||
|
command(:create, table_name, non_ascii_family)
|
||||||
|
|
||||||
|
# use no formatter (expect hex)
|
||||||
|
command(:put, table_name, non_ascii_row, non_ascii_column, non_ascii_text)
|
||||||
|
output = capture_stdout{ command(:get, table_name, non_ascii_row) }
|
||||||
|
assert(!output.include?(non_ascii_text))
|
||||||
|
assert(!output.include?(non_ascii_column))
|
||||||
|
assert(output.include?(hex_text))
|
||||||
|
assert(output.include?(hex_column))
|
||||||
|
|
||||||
|
# use the formatter method
|
||||||
|
output = capture_stdout{ command(:get, table_name, non_ascii_row, {'FORMATTER'=>'toString'}) }
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
assert(output.include?(non_ascii_column))
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
assert(!output.include?(hex_column))
|
||||||
|
|
||||||
|
# use the formatter method + class
|
||||||
|
output = capture_stdout{ command(:get, table_name, non_ascii_row, {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) }
|
||||||
|
assert(output.include?(non_ascii_text))
|
||||||
|
assert(output.include?(non_ascii_column))
|
||||||
|
assert(!output.include?(hex_text))
|
||||||
|
assert(!output.include?(hex_column))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue