HBASE-11676 Scan FORMATTER is not applied for columns using non-printable name in shell (#2161)
- In HBase::Table, the instance variable @converters is used to map column names to converters. This patch fixes how HBase::Table#_get_internal and HBase::Table#_scan_internal generate the column name key used to access @converters. - Refactor parsing of family:qualifier:converter specifications so that the code is more readable and reusable. As part of this change, I added two private methods and marked HBase::Table#set_converter as deprecated for removal in HBase 4.0.0. - Add unit testing for the fixed bug Signed-off-by: stack <stack@apache.org>
This commit is contained in:
parent
6ded070f82
commit
568d73d7db
|
@ -449,18 +449,23 @@ EOF
|
||||||
# Print out results. Result can be Cell or RowResult.
|
# Print out results. Result can be Cell or RowResult.
|
||||||
res = {}
|
res = {}
|
||||||
result.listCells.each do |c|
|
result.listCells.each do |c|
|
||||||
family = convert_bytes_with_position(c.getFamilyArray,
|
# Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
|
||||||
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
|
# column is constructed in this consistent way to that it can be used as a key.
|
||||||
qualifier = convert_bytes_with_position(c.getQualifierArray,
|
family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength)
|
||||||
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
|
qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength)
|
||||||
|
column = "#{family_bytes}:#{qualifier_bytes}"
|
||||||
|
|
||||||
column = "#{family}:#{qualifier}"
|
|
||||||
value = to_string(column, c, maxlength, converter_class, converter)
|
value = to_string(column, c, maxlength, converter_class, converter)
|
||||||
|
|
||||||
|
# Use the FORMATTER to determine how column is printed
|
||||||
|
family = convert_bytes(family_bytes, converter_class, converter)
|
||||||
|
qualifier = convert_bytes(qualifier_bytes, converter_class, converter)
|
||||||
|
formatted_column = "#{family}:#{qualifier}"
|
||||||
|
|
||||||
if block_given?
|
if block_given?
|
||||||
yield(column, value)
|
yield(formatted_column, value)
|
||||||
else
|
else
|
||||||
res[column] = value
|
res[formatted_column] = value
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -604,19 +609,24 @@ EOF
|
||||||
is_stale |= row.isStale
|
is_stale |= row.isStale
|
||||||
|
|
||||||
row.listCells.each do |c|
|
row.listCells.each do |c|
|
||||||
family = convert_bytes_with_position(c.getFamilyArray,
|
# Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
|
||||||
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
|
# column is constructed in this consistent way to that it can be used as a key.
|
||||||
qualifier = convert_bytes_with_position(c.getQualifierArray,
|
family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength)
|
||||||
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
|
qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength)
|
||||||
|
column = "#{family_bytes}:#{qualifier_bytes}"
|
||||||
|
|
||||||
column = "#{family}:#{qualifier}"
|
|
||||||
cell = to_string(column, c, maxlength, converter_class, converter)
|
cell = to_string(column, c, maxlength, converter_class, converter)
|
||||||
|
|
||||||
|
# Use the FORMATTER to determine how column is printed
|
||||||
|
family = convert_bytes(family_bytes, converter_class, converter)
|
||||||
|
qualifier = convert_bytes(qualifier_bytes, converter_class, converter)
|
||||||
|
formatted_column = "#{family}:#{qualifier}"
|
||||||
|
|
||||||
if block_given?
|
if block_given?
|
||||||
yield(key, "column=#{column}, #{cell}")
|
yield(key, "column=#{formatted_column}, #{cell}")
|
||||||
else
|
else
|
||||||
res[key] ||= {}
|
res[key] ||= {}
|
||||||
res[key][column] = cell
|
res[key][formatted_column] = cell
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
# One more row processed
|
# One more row processed
|
||||||
|
@ -729,11 +739,15 @@ EOF
|
||||||
org.apache.hadoop.hbase.TableName::META_TABLE_NAME.equals(@table.getName)
|
org.apache.hadoop.hbase.TableName::META_TABLE_NAME.equals(@table.getName)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns family and (when has it) qualifier for a column name
|
# Given a column specification in the format FAMILY[:QUALIFIER[:CONVERTER]]
|
||||||
|
# 1. Save the converter for the given column
|
||||||
|
# 2. Return a 2-element Array with [family, qualifier or nil], discarding the converter if provided
|
||||||
|
#
|
||||||
|
# @param [String] column specification
|
||||||
def parse_column_name(column)
|
def parse_column_name(column)
|
||||||
split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes)
|
spec = parse_column_format_spec(column)
|
||||||
set_converter(split) if split.length > 1
|
set_column_converter(spec.family, spec.qualifier, spec.converter) unless spec.converter.nil?
|
||||||
[split[0], split.length > 1 ? split[1] : nil]
|
[spec.family, spec.qualifier]
|
||||||
end
|
end
|
||||||
|
|
||||||
def toISO8601(millis)
|
def toISO8601(millis)
|
||||||
|
@ -806,9 +820,46 @@ EOF
|
||||||
eval(converter_class).method(converter_method).call(bytes, offset, len)
|
eval(converter_class).method(converter_method).call(bytes, offset, len)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# store the information designating what part of a column should be printed, and how
|
||||||
|
ColumnFormatSpec = Struct.new(:family, :qualifier, :converter)
|
||||||
|
|
||||||
|
##
|
||||||
|
# Parse the column specification for formatting used by shell commands like :scan
|
||||||
|
#
|
||||||
|
# Strings should be structured as follows:
|
||||||
|
# FAMILY:QUALIFIER[:CONVERTER]
|
||||||
|
# Where:
|
||||||
|
# - FAMILY is the column family
|
||||||
|
# - QUALIFIER is the column qualifier. Non-printable characters should be left AS-IS and should NOT BE escaped.
|
||||||
|
# - CONVERTER is optional and is the name of a converter (like toLong) to apply
|
||||||
|
#
|
||||||
|
# @param [String] column
|
||||||
|
# @return [ColumnFormatSpec] family, qualifier, and converter as Java bytes
|
||||||
|
private def parse_column_format_spec(column)
|
||||||
|
split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes)
|
||||||
|
family = split[0]
|
||||||
|
qualifier = nil
|
||||||
|
converter = nil
|
||||||
|
if split.length > 1
|
||||||
|
parts = org.apache.hadoop.hbase.CellUtil.parseColumn(split[1])
|
||||||
|
qualifier = parts[0]
|
||||||
|
if parts.length > 1
|
||||||
|
converter = parts[1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
ColumnFormatSpec.new(family, qualifier, converter)
|
||||||
|
end
|
||||||
|
|
||||||
|
private def set_column_converter(family, qualifier, converter)
|
||||||
|
@converters["#{String.from_java_bytes(family)}:#{String.from_java_bytes(qualifier)}"] = String.from_java_bytes(converter)
|
||||||
|
end
|
||||||
|
|
||||||
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
|
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
|
||||||
# 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
|
# 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
|
||||||
# 2. register the CONVERTER information based on column spec - "cf:qualifier"
|
# 2. register the CONVERTER information based on column spec - "cf:qualifier"
|
||||||
|
#
|
||||||
|
# Deprecated for removal in 4.0.0
|
||||||
def set_converter(column)
|
def set_converter(column)
|
||||||
family = String.from_java_bytes(column[0])
|
family = String.from_java_bytes(column[0])
|
||||||
parts = org.apache.hadoop.hbase.CellUtil.parseColumn(column[1])
|
parts = org.apache.hadoop.hbase.CellUtil.parseColumn(column[1])
|
||||||
|
@ -817,6 +868,8 @@ EOF
|
||||||
column[1] = parts[0]
|
column[1] = parts[0]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
extend Gem::Deprecate
|
||||||
|
deprecate :set_converter, "4.0.0", nil, nil
|
||||||
|
|
||||||
#----------------------------------------------------------------------------------------------
|
#----------------------------------------------------------------------------------------------
|
||||||
# Get the split points for the table
|
# Get the split points for the table
|
||||||
|
|
|
@ -237,6 +237,7 @@ module Hbase
|
||||||
@test_ts = 12345678
|
@test_ts = 12345678
|
||||||
@test_table.put(1, "x:a", 1)
|
@test_table.put(1, "x:a", 1)
|
||||||
@test_table.put(1, "x:b", 2, @test_ts)
|
@test_table.put(1, "x:b", 2, @test_ts)
|
||||||
|
@test_table.put(1, "x:\x11", [921].pack("N"))
|
||||||
|
|
||||||
@test_table.put(2, "x:a", 11)
|
@test_table.put(2, "x:a", 11)
|
||||||
@test_table.put(2, "x:b", 12, @test_ts)
|
@test_table.put(2, "x:b", 12, @test_ts)
|
||||||
|
@ -331,9 +332,10 @@ module Hbase
|
||||||
end
|
end
|
||||||
|
|
||||||
define_test "get should work with hash columns spec and an array of strings COLUMN parameter" do
|
define_test "get should work with hash columns spec and an array of strings COLUMN parameter" do
|
||||||
res = @test_table._get_internal('1', COLUMN => [ 'x:a', 'x:b' ])
|
res = @test_table._get_internal('1', COLUMN => [ "x:\x11", 'x:a', 'x:b' ])
|
||||||
assert_not_nil(res)
|
assert_not_nil(res)
|
||||||
assert_kind_of(Hash, res)
|
assert_kind_of(Hash, res)
|
||||||
|
assert_not_nil(res['x:\x11'])
|
||||||
assert_not_nil(res['x:a'])
|
assert_not_nil(res['x:a'])
|
||||||
assert_not_nil(res['x:b'])
|
assert_not_nil(res['x:b'])
|
||||||
end
|
end
|
||||||
|
@ -354,6 +356,18 @@ module Hbase
|
||||||
assert_not_nil(res['x:b'])
|
assert_not_nil(res['x:b'])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
define_test "get should work with non-printable columns and values" do
|
||||||
|
res = @test_table._get_internal('1', COLUMNS => [ "x:\x11" ])
|
||||||
|
assert_not_nil(res)
|
||||||
|
assert_kind_of(Hash, res)
|
||||||
|
assert_match(/value=\\x00\\x00\\x03\\x99/, res[ 'x:\x11' ])
|
||||||
|
|
||||||
|
res = @test_table._get_internal('1', COLUMNS => [ "x:\x11:toInt" ])
|
||||||
|
assert_not_nil(res)
|
||||||
|
assert_kind_of(Hash, res)
|
||||||
|
assert_match(/value=921/, res[ 'x:\x11' ])
|
||||||
|
end
|
||||||
|
|
||||||
define_test "get should work with hash columns spec and TIMESTAMP only" do
|
define_test "get should work with hash columns spec and TIMESTAMP only" do
|
||||||
res = @test_table._get_internal('1', TIMESTAMP => @test_ts)
|
res = @test_table._get_internal('1', TIMESTAMP => @test_ts)
|
||||||
assert_not_nil(res)
|
assert_not_nil(res)
|
||||||
|
@ -410,10 +424,10 @@ module Hbase
|
||||||
assert_not_nil(res['x:b'])
|
assert_not_nil(res['x:b'])
|
||||||
end
|
end
|
||||||
|
|
||||||
define_test "get with a block should yield (column, value) pairs" do
|
define_test "get with a block should yield (formatted column, value) pairs" do
|
||||||
res = {}
|
res = {}
|
||||||
@test_table._get_internal('1') { |col, val| res[col] = val }
|
@test_table._get_internal('1') { |col, val| res[col] = val }
|
||||||
assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
|
assert_equal([ 'x:\x11', 'x:a', 'x:b' ], res.keys.sort)
|
||||||
end
|
end
|
||||||
|
|
||||||
define_test "get should support COLUMNS with value CONVERTER information" do
|
define_test "get should support COLUMNS with value CONVERTER information" do
|
||||||
|
@ -707,12 +721,14 @@ module Hbase
|
||||||
define_test "scan should support COLUMNS with value CONVERTER information" do
|
define_test "scan should support COLUMNS with value CONVERTER information" do
|
||||||
@test_table.put(1, "x:c", [1024].pack('N'))
|
@test_table.put(1, "x:c", [1024].pack('N'))
|
||||||
@test_table.put(1, "x:d", [98].pack('N'))
|
@test_table.put(1, "x:d", [98].pack('N'))
|
||||||
|
@test_table.put(1, "x:\x11", [712].pack('N'))
|
||||||
begin
|
begin
|
||||||
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']
|
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt', "x:\x11:toInt"]
|
||||||
assert_not_nil(res)
|
assert_not_nil(res)
|
||||||
assert_kind_of(Hash, res)
|
assert_kind_of(Hash, res)
|
||||||
assert_not_nil(/value=1024/.match(res['1']['x:c']))
|
assert_match(/value=1024/, res['1']['x:c'])
|
||||||
assert_not_nil(/value=98/.match(res['1']['x:d']))
|
assert_match(/value=98/, res['1']['x:d'])
|
||||||
|
assert_match(/value=712/, res['1']['x:\x11'])
|
||||||
ensure
|
ensure
|
||||||
# clean up newly added columns for this test only.
|
# clean up newly added columns for this test only.
|
||||||
@test_table.deleteall(1, 'x:c')
|
@test_table.deleteall(1, 'x:c')
|
||||||
|
|
Loading…
Reference in New Issue