HBASE-18001 Extend the "count" shell command to support specified conditions

Signed-off-by: Chia-Ping Tsai <chia7712@gmail.com>
This commit is contained in:
Guangxu Cheng 2017-05-17 22:51:47 +08:00 committed by Chia-Ping Tsai
parent 998bd5f90e
commit 64c701768b
3 changed files with 54 additions and 6 deletions

View File

@ -297,12 +297,28 @@ EOF
#----------------------------------------------------------------------------------------------
# Count rows in a table
def _count_internal(interval = 1000, caching_rows = 10)
def _count_internal(interval = 1000, scan = nil)
raise(ArgumentError, "Scan argument should be org.apache.hadoop.hbase.client.Scan") \
unless scan == nil || scan.kind_of?(org.apache.hadoop.hbase.client.Scan)
# We can safely set scanner caching with the first key only filter
scan = org.apache.hadoop.hbase.client.Scan.new
scan.setCacheBlocks(false)
scan.setCaching(caching_rows)
scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new)
if scan == nil
scan = org.apache.hadoop.hbase.client.Scan.new
scan.setCacheBlocks(false)
scan.setCaching(10)
scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new)
else
scan.setCacheBlocks(false)
filter = scan.getFilter()
firstKeyOnlyFilter = org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new
if filter == nil
scan.setFilter(firstKeyOnlyFilter)
else
firstKeyOnlyFilter.setReversed(filter.isReversed())
scan.setFilter(org.apache.hadoop.hbase.filter.FilterList.new(filter, firstKeyOnlyFilter))
end
end
# Run the scanner
scanner = @table.getScanner(scan)

View File

@ -35,6 +35,9 @@ parameter. Examples:
hbase> count 't1', INTERVAL => 100000
hbase> count 't1', CACHE => 1000
hbase> count 't1', INTERVAL => 10, CACHE => 1000
hbase> count 't1', FILTER => "
(QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"
hbase> count 't1', COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz'
The same commands also can be run on a table reference. Suppose you had a reference
t to table 't1', the corresponding commands would be:
@ -43,6 +46,9 @@ t to table 't1', the corresponding commands would be:
hbase> t.count INTERVAL => 100000
hbase> t.count CACHE => 1000
hbase> t.count INTERVAL => 10, CACHE => 1000
hbase> t.count FILTER => "
(QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"
hbase> t.count COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz'
EOF
end
@ -60,10 +66,11 @@ EOF
'CACHE' => 10
}.merge(params)
scan = table._hash_to_scan(params)
# Call the counter method
@start_time = Time.now
formatter.header
count = table._count_internal(params['INTERVAL'].to_i, params['CACHE'].to_i) do |cnt, row|
count = table._count_internal(params['INTERVAL'].to_i, scan) do |cnt, row|
formatter.row([ "Current count: #{cnt}, row: #{row}" ])
end
formatter.footer(count)

View File

@ -250,6 +250,31 @@ module Hbase
assert(!rows.empty?)
end
define_test "count should support STARTROW parameter" do
count = @test_table.count STARTROW => '4'
assert(count == 0)
end
define_test "count should support STOPROW parameter" do
count = @test_table.count STOPROW => '0'
assert(count == 0)
end
define_test "count should support COLUMNS parameter" do
@test_table.put(4, "x:c", "31")
begin
count = @test_table.count COLUMNS => [ 'x:c']
assert(count == 1)
ensure
@test_table.delete(4, "x:c")
end
end
define_test "count should support FILTER parameter" do
count = @test_table.count FILTER => "ValueFilter(=, 'binary:11')"
assert(count == 1)
end
#-------------------------------------------------------------------------------
define_test "get should work w/o columns specification" do