diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 1825ae40b83..30cdb999b2f 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -449,18 +449,23 @@ EOF # Print out results. Result can be Cell or RowResult. res = {} result.listCells.each do |c| - family = convert_bytes_with_position(c.getFamilyArray, - c.getFamilyOffset, c.getFamilyLength, converter_class, converter) - qualifier = convert_bytes_with_position(c.getQualifierArray, - c.getQualifierOffset, c.getQualifierLength, converter_class, converter) + # Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that + # column is constructed in this consistent way to that it can be used as a key. + family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength) + qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength) + column = "#{family_bytes}:#{qualifier_bytes}" - column = "#{family}:#{qualifier}" value = to_string(column, c, maxlength, converter_class, converter) + # Use the FORMATTER to determine how column is printed + family = convert_bytes(family_bytes, converter_class, converter) + qualifier = convert_bytes(qualifier_bytes, converter_class, converter) + formatted_column = "#{family}:#{qualifier}" + if block_given? - yield(column, value) + yield(formatted_column, value) else - res[column] = value + res[formatted_column] = value end end @@ -604,19 +609,24 @@ EOF is_stale |= row.isStale row.listCells.each do |c| - family = convert_bytes_with_position(c.getFamilyArray, - c.getFamilyOffset, c.getFamilyLength, converter_class, converter) - qualifier = convert_bytes_with_position(c.getQualifierArray, - c.getQualifierOffset, c.getQualifierLength, converter_class, converter) + # Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that + # column is constructed in this consistent way to that it can be used as a key. + family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength) + qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength) + column = "#{family_bytes}:#{qualifier_bytes}" - column = "#{family}:#{qualifier}" cell = to_string(column, c, maxlength, converter_class, converter) + # Use the FORMATTER to determine how column is printed + family = convert_bytes(family_bytes, converter_class, converter) + qualifier = convert_bytes(qualifier_bytes, converter_class, converter) + formatted_column = "#{family}:#{qualifier}" + if block_given? - yield(key, "column=#{column}, #{cell}") + yield(key, "column=#{formatted_column}, #{cell}") else res[key] ||= {} - res[key][column] = cell + res[key][formatted_column] = cell end end # One more row processed @@ -729,11 +739,15 @@ EOF org.apache.hadoop.hbase.TableName::META_TABLE_NAME.equals(@table.getName) end - # Returns family and (when has it) qualifier for a column name + # Given a column specification in the format FAMILY[:QUALIFIER[:CONVERTER]] + # 1. Save the converter for the given column + # 2. Return a 2-element Array with [family, qualifier or nil], discarding the converter if provided + # + # @param [String] column specification def parse_column_name(column) - split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes) - set_converter(split) if split.length > 1 - [split[0], split.length > 1 ? split[1] : nil] + spec = parse_column_format_spec(column) + set_column_converter(spec.family, spec.qualifier, spec.converter) unless spec.converter.nil? + [spec.family, spec.qualifier] end def toISO8601(millis) @@ -806,9 +820,46 @@ EOF eval(converter_class).method(converter_method).call(bytes, offset, len) end + # store the information designating what part of a column should be printed, and how + ColumnFormatSpec = Struct.new(:family, :qualifier, :converter) + + ## + # Parse the column specification for formatting used by shell commands like :scan + # + # Strings should be structured as follows: + # FAMILY:QUALIFIER[:CONVERTER] + # Where: + # - FAMILY is the column family + # - QUALIFIER is the column qualifier. Non-printable characters should be left AS-IS and should NOT BE escaped. + # - CONVERTER is optional and is the name of a converter (like toLong) to apply + # + # @param [String] column + # @return [ColumnFormatSpec] family, qualifier, and converter as Java bytes + private def parse_column_format_spec(column) + split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes) + family = split[0] + qualifier = nil + converter = nil + if split.length > 1 + parts = org.apache.hadoop.hbase.CellUtil.parseColumn(split[1]) + qualifier = parts[0] + if parts.length > 1 + converter = parts[1] + end + end + + ColumnFormatSpec.new(family, qualifier, converter) + end + + private def set_column_converter(family, qualifier, converter) + @converters["#{String.from_java_bytes(family)}:#{String.from_java_bytes(qualifier)}"] = String.from_java_bytes(converter) + end + # if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair. # 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only # 2. register the CONVERTER information based on column spec - "cf:qualifier" + # + # Deprecated for removal in 4.0.0 def set_converter(column) family = String.from_java_bytes(column[0]) parts = org.apache.hadoop.hbase.CellUtil.parseColumn(column[1]) @@ -817,6 +868,8 @@ EOF column[1] = parts[0] end end + extend Gem::Deprecate + deprecate :set_converter, "4.0.0", nil, nil #---------------------------------------------------------------------------------------------- # Get the split points for the table diff --git a/hbase-shell/src/test/ruby/hbase/table_test.rb b/hbase-shell/src/test/ruby/hbase/table_test.rb index 20bcb500a80..ee6f0f58967 100644 --- a/hbase-shell/src/test/ruby/hbase/table_test.rb +++ b/hbase-shell/src/test/ruby/hbase/table_test.rb @@ -239,6 +239,7 @@ module Hbase @test_ts = 12345678 @test_table.put(1, "x:a", 1) @test_table.put(1, "x:b", 2, @test_ts) + @test_table.put(1, "x:\x11", [921].pack("N")) @test_table.put(2, "x:a", 11) @test_table.put(2, "x:b", 12, @test_ts) @@ -333,9 +334,10 @@ module Hbase end define_test "get should work with hash columns spec and an array of strings COLUMN parameter" do - res = @test_table._get_internal('1', COLUMN => [ 'x:a', 'x:b' ]) + res = @test_table._get_internal('1', COLUMN => [ "x:\x11", 'x:a', 'x:b' ]) assert_not_nil(res) assert_kind_of(Hash, res) + assert_not_nil(res['x:\x11']) assert_not_nil(res['x:a']) assert_not_nil(res['x:b']) end @@ -356,6 +358,18 @@ module Hbase assert_not_nil(res['x:b']) end + define_test "get should work with non-printable columns and values" do + res = @test_table._get_internal('1', COLUMNS => [ "x:\x11" ]) + assert_not_nil(res) + assert_kind_of(Hash, res) + assert_match(/value=\\x00\\x00\\x03\\x99/, res[ 'x:\x11' ]) + + res = @test_table._get_internal('1', COLUMNS => [ "x:\x11:toInt" ]) + assert_not_nil(res) + assert_kind_of(Hash, res) + assert_match(/value=921/, res[ 'x:\x11' ]) + end + define_test "get should work with hash columns spec and TIMESTAMP only" do res = @test_table._get_internal('1', TIMESTAMP => @test_ts) assert_not_nil(res) @@ -412,10 +426,10 @@ module Hbase assert_not_nil(res['x:b']) end - define_test "get with a block should yield (column, value) pairs" do + define_test "get with a block should yield (formatted column, value) pairs" do res = {} @test_table._get_internal('1') { |col, val| res[col] = val } - assert_equal(res.keys.sort, [ 'x:a', 'x:b' ]) + assert_equal([ 'x:\x11', 'x:a', 'x:b' ], res.keys.sort) end define_test "get should support COLUMNS with value CONVERTER information" do @@ -709,12 +723,14 @@ module Hbase define_test "scan should support COLUMNS with value CONVERTER information" do @test_table.put(1, "x:c", [1024].pack('N')) @test_table.put(1, "x:d", [98].pack('N')) + @test_table.put(1, "x:\x11", [712].pack('N')) begin - res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt'] + res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt', "x:\x11:toInt"] assert_not_nil(res) assert_kind_of(Hash, res) - assert_not_nil(/value=1024/.match(res['1']['x:c'])) - assert_not_nil(/value=98/.match(res['1']['x:d'])) + assert_match(/value=1024/, res['1']['x:c']) + assert_match(/value=98/, res['1']['x:d']) + assert_match(/value=712/, res['1']['x:\x11']) ensure # clean up newly added columns for this test only. @test_table.deleteall(1, 'x:c')