HBASE-11676 Scan FORMATTER is not applied for columns using non-printable name in shell (#2161)
- In HBase::Table, the instance variable @converters is used to map column names to converters. This patch fixes how HBase::Table#_get_internal and HBase::Table#_scan_internal generate the column name key used to access @converters. - Refactor parsing of family:qualifier:converter specifications so that the code is more readable and reusable. As part of this change, I added two private methods and marked HBase::Table#set_converter as deprecated for removal in HBase 4.0.0. - Add unit testing for the fixed bug Signed-off-by: stack <stack@apache.org>
This commit is contained in:
parent
4471a644f6
commit
7974a1e9bf
|
@ -449,18 +449,23 @@ EOF
|
|||
# Print out results. Result can be Cell or RowResult.
|
||||
res = {}
|
||||
result.listCells.each do |c|
|
||||
family = convert_bytes_with_position(c.getFamilyArray,
|
||||
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
|
||||
qualifier = convert_bytes_with_position(c.getQualifierArray,
|
||||
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
|
||||
# Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
|
||||
# column is constructed in this consistent way to that it can be used as a key.
|
||||
family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength)
|
||||
qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength)
|
||||
column = "#{family_bytes}:#{qualifier_bytes}"
|
||||
|
||||
column = "#{family}:#{qualifier}"
|
||||
value = to_string(column, c, maxlength, converter_class, converter)
|
||||
|
||||
# Use the FORMATTER to determine how column is printed
|
||||
family = convert_bytes(family_bytes, converter_class, converter)
|
||||
qualifier = convert_bytes(qualifier_bytes, converter_class, converter)
|
||||
formatted_column = "#{family}:#{qualifier}"
|
||||
|
||||
if block_given?
|
||||
yield(column, value)
|
||||
yield(formatted_column, value)
|
||||
else
|
||||
res[column] = value
|
||||
res[formatted_column] = value
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -604,19 +609,24 @@ EOF
|
|||
is_stale |= row.isStale
|
||||
|
||||
row.listCells.each do |c|
|
||||
family = convert_bytes_with_position(c.getFamilyArray,
|
||||
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
|
||||
qualifier = convert_bytes_with_position(c.getQualifierArray,
|
||||
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
|
||||
# Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
|
||||
# column is constructed in this consistent way to that it can be used as a key.
|
||||
family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength)
|
||||
qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength)
|
||||
column = "#{family_bytes}:#{qualifier_bytes}"
|
||||
|
||||
column = "#{family}:#{qualifier}"
|
||||
cell = to_string(column, c, maxlength, converter_class, converter)
|
||||
|
||||
# Use the FORMATTER to determine how column is printed
|
||||
family = convert_bytes(family_bytes, converter_class, converter)
|
||||
qualifier = convert_bytes(qualifier_bytes, converter_class, converter)
|
||||
formatted_column = "#{family}:#{qualifier}"
|
||||
|
||||
if block_given?
|
||||
yield(key, "column=#{column}, #{cell}")
|
||||
yield(key, "column=#{formatted_column}, #{cell}")
|
||||
else
|
||||
res[key] ||= {}
|
||||
res[key][column] = cell
|
||||
res[key][formatted_column] = cell
|
||||
end
|
||||
end
|
||||
# One more row processed
|
||||
|
@ -729,11 +739,15 @@ EOF
|
|||
org.apache.hadoop.hbase.TableName::META_TABLE_NAME.equals(@table.getName)
|
||||
end
|
||||
|
||||
# Returns family and (when has it) qualifier for a column name
|
||||
# Given a column specification in the format FAMILY[:QUALIFIER[:CONVERTER]]
|
||||
# 1. Save the converter for the given column
|
||||
# 2. Return a 2-element Array with [family, qualifier or nil], discarding the converter if provided
|
||||
#
|
||||
# @param [String] column specification
|
||||
def parse_column_name(column)
|
||||
split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes)
|
||||
set_converter(split) if split.length > 1
|
||||
[split[0], split.length > 1 ? split[1] : nil]
|
||||
spec = parse_column_format_spec(column)
|
||||
set_column_converter(spec.family, spec.qualifier, spec.converter) unless spec.converter.nil?
|
||||
[spec.family, spec.qualifier]
|
||||
end
|
||||
|
||||
def toISO8601(millis)
|
||||
|
@ -806,9 +820,46 @@ EOF
|
|||
eval(converter_class).method(converter_method).call(bytes, offset, len)
|
||||
end
|
||||
|
||||
# store the information designating what part of a column should be printed, and how
|
||||
ColumnFormatSpec = Struct.new(:family, :qualifier, :converter)
|
||||
|
||||
##
|
||||
# Parse the column specification for formatting used by shell commands like :scan
|
||||
#
|
||||
# Strings should be structured as follows:
|
||||
# FAMILY:QUALIFIER[:CONVERTER]
|
||||
# Where:
|
||||
# - FAMILY is the column family
|
||||
# - QUALIFIER is the column qualifier. Non-printable characters should be left AS-IS and should NOT BE escaped.
|
||||
# - CONVERTER is optional and is the name of a converter (like toLong) to apply
|
||||
#
|
||||
# @param [String] column
|
||||
# @return [ColumnFormatSpec] family, qualifier, and converter as Java bytes
|
||||
private def parse_column_format_spec(column)
|
||||
split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes)
|
||||
family = split[0]
|
||||
qualifier = nil
|
||||
converter = nil
|
||||
if split.length > 1
|
||||
parts = org.apache.hadoop.hbase.CellUtil.parseColumn(split[1])
|
||||
qualifier = parts[0]
|
||||
if parts.length > 1
|
||||
converter = parts[1]
|
||||
end
|
||||
end
|
||||
|
||||
ColumnFormatSpec.new(family, qualifier, converter)
|
||||
end
|
||||
|
||||
private def set_column_converter(family, qualifier, converter)
|
||||
@converters["#{String.from_java_bytes(family)}:#{String.from_java_bytes(qualifier)}"] = String.from_java_bytes(converter)
|
||||
end
|
||||
|
||||
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
|
||||
# 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
|
||||
# 2. register the CONVERTER information based on column spec - "cf:qualifier"
|
||||
#
|
||||
# Deprecated for removal in 4.0.0
|
||||
def set_converter(column)
|
||||
family = String.from_java_bytes(column[0])
|
||||
parts = org.apache.hadoop.hbase.CellUtil.parseColumn(column[1])
|
||||
|
@ -817,6 +868,8 @@ EOF
|
|||
column[1] = parts[0]
|
||||
end
|
||||
end
|
||||
extend Gem::Deprecate
|
||||
deprecate :set_converter, "4.0.0", nil, nil
|
||||
|
||||
#----------------------------------------------------------------------------------------------
|
||||
# Get the split points for the table
|
||||
|
|
|
@ -239,6 +239,7 @@ module Hbase
|
|||
@test_ts = 12345678
|
||||
@test_table.put(1, "x:a", 1)
|
||||
@test_table.put(1, "x:b", 2, @test_ts)
|
||||
@test_table.put(1, "x:\x11", [921].pack("N"))
|
||||
|
||||
@test_table.put(2, "x:a", 11)
|
||||
@test_table.put(2, "x:b", 12, @test_ts)
|
||||
|
@ -333,9 +334,10 @@ module Hbase
|
|||
end
|
||||
|
||||
define_test "get should work with hash columns spec and an array of strings COLUMN parameter" do
|
||||
res = @test_table._get_internal('1', COLUMN => [ 'x:a', 'x:b' ])
|
||||
res = @test_table._get_internal('1', COLUMN => [ "x:\x11", 'x:a', 'x:b' ])
|
||||
assert_not_nil(res)
|
||||
assert_kind_of(Hash, res)
|
||||
assert_not_nil(res['x:\x11'])
|
||||
assert_not_nil(res['x:a'])
|
||||
assert_not_nil(res['x:b'])
|
||||
end
|
||||
|
@ -356,6 +358,18 @@ module Hbase
|
|||
assert_not_nil(res['x:b'])
|
||||
end
|
||||
|
||||
define_test "get should work with non-printable columns and values" do
|
||||
res = @test_table._get_internal('1', COLUMNS => [ "x:\x11" ])
|
||||
assert_not_nil(res)
|
||||
assert_kind_of(Hash, res)
|
||||
assert_match(/value=\\x00\\x00\\x03\\x99/, res[ 'x:\x11' ])
|
||||
|
||||
res = @test_table._get_internal('1', COLUMNS => [ "x:\x11:toInt" ])
|
||||
assert_not_nil(res)
|
||||
assert_kind_of(Hash, res)
|
||||
assert_match(/value=921/, res[ 'x:\x11' ])
|
||||
end
|
||||
|
||||
define_test "get should work with hash columns spec and TIMESTAMP only" do
|
||||
res = @test_table._get_internal('1', TIMESTAMP => @test_ts)
|
||||
assert_not_nil(res)
|
||||
|
@ -412,10 +426,10 @@ module Hbase
|
|||
assert_not_nil(res['x:b'])
|
||||
end
|
||||
|
||||
define_test "get with a block should yield (column, value) pairs" do
|
||||
define_test "get with a block should yield (formatted column, value) pairs" do
|
||||
res = {}
|
||||
@test_table._get_internal('1') { |col, val| res[col] = val }
|
||||
assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
|
||||
assert_equal([ 'x:\x11', 'x:a', 'x:b' ], res.keys.sort)
|
||||
end
|
||||
|
||||
define_test "get should support COLUMNS with value CONVERTER information" do
|
||||
|
@ -709,12 +723,14 @@ module Hbase
|
|||
define_test "scan should support COLUMNS with value CONVERTER information" do
|
||||
@test_table.put(1, "x:c", [1024].pack('N'))
|
||||
@test_table.put(1, "x:d", [98].pack('N'))
|
||||
@test_table.put(1, "x:\x11", [712].pack('N'))
|
||||
begin
|
||||
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']
|
||||
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt', "x:\x11:toInt"]
|
||||
assert_not_nil(res)
|
||||
assert_kind_of(Hash, res)
|
||||
assert_not_nil(/value=1024/.match(res['1']['x:c']))
|
||||
assert_not_nil(/value=98/.match(res['1']['x:d']))
|
||||
assert_match(/value=1024/, res['1']['x:c'])
|
||||
assert_match(/value=98/, res['1']['x:d'])
|
||||
assert_match(/value=712/, res['1']['x:\x11'])
|
||||
ensure
|
||||
# clean up newly added columns for this test only.
|
||||
@test_table.deleteall(1, 'x:c')
|
||||
|
|
Loading…
Reference in New Issue