HBASE-25278 Add CACHE_BLOCKS option to count shell command
Expose an argument on the `count` command which is passed to the `setCacheBlocks` method on the Scan which the count command uses. This is a quick and dirty approach to read all of the blocks for a table into the block cache. * Raise an error when the value isn't a boolean or the expected string Closes #2650 Signed-off-by: Zach York <zyork@apache.org> Signed-off-by: Peter Somogyi <psomogyi@apache.org>
This commit is contained in:
parent
390abb520c
commit
1c09f24b2d
|
@ -303,18 +303,18 @@ EOF
|
||||||
|
|
||||||
#----------------------------------------------------------------------------------------------
|
#----------------------------------------------------------------------------------------------
|
||||||
# Count rows in a table
|
# Count rows in a table
|
||||||
def _count_internal(interval = 1000, scan = nil)
|
def _count_internal(interval = 1000, scan = nil, cacheBlocks=false)
|
||||||
raise(ArgumentError, 'Scan argument should be org.apache.hadoop.hbase.client.Scan') \
|
raise(ArgumentError, 'Scan argument should be org.apache.hadoop.hbase.client.Scan') \
|
||||||
unless scan.nil? || scan.is_a?(org.apache.hadoop.hbase.client.Scan)
|
unless scan.nil? || scan.is_a?(org.apache.hadoop.hbase.client.Scan)
|
||||||
# We can safely set scanner caching with the first key only filter
|
# We can safely set scanner caching with the first key only filter
|
||||||
|
|
||||||
if scan.nil?
|
if scan.nil?
|
||||||
scan = org.apache.hadoop.hbase.client.Scan.new
|
scan = org.apache.hadoop.hbase.client.Scan.new
|
||||||
scan.setCacheBlocks(false)
|
scan.setCacheBlocks(cacheBlocks)
|
||||||
scan.setCaching(10)
|
scan.setCaching(10)
|
||||||
scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new)
|
scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new)
|
||||||
else
|
else
|
||||||
scan.setCacheBlocks(false)
|
scan.setCacheBlocks(cacheBlocks)
|
||||||
filter = scan.getFilter
|
filter = scan.getFilter
|
||||||
firstKeyOnlyFilter = org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new
|
firstKeyOnlyFilter = org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new
|
||||||
if filter.nil?
|
if filter.nil?
|
||||||
|
|
|
@ -49,6 +49,17 @@ t to table 't1', the corresponding commands would be:
|
||||||
hbase> t.count FILTER => "
|
hbase> t.count FILTER => "
|
||||||
(QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"
|
(QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"
|
||||||
hbase> t.count COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz'
|
hbase> t.count COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz'
|
||||||
|
|
||||||
|
By default, this operation does not cause any new blocks to be read into
|
||||||
|
the RegionServer block cache. This is typically the desired action; however,
|
||||||
|
if you want to force all blocks for a table to be loaded into the block cache
|
||||||
|
on-demand, you can pass the 'CACHE_BLOCKS' option with a value of 'true'. A value
|
||||||
|
of 'false' is the default and will result in no blocks being cached. This
|
||||||
|
command can be used in conjunction with all other options.
|
||||||
|
|
||||||
|
hbase> count 'ns1:t1', CACHE_BLOCKS => true
|
||||||
|
hbase> count 'ns1:t1', CACHE_BLOCKS => 'true'
|
||||||
|
hbase> count 'ns1:t1', INTERVAL => 100000, CACHE_BLOCKS => false
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -60,17 +71,29 @@ EOF
|
||||||
# If the second parameter is an integer, then it is the old command syntax
|
# If the second parameter is an integer, then it is the old command syntax
|
||||||
params = { 'INTERVAL' => params } if params.is_a?(Integer)
|
params = { 'INTERVAL' => params } if params.is_a?(Integer)
|
||||||
|
|
||||||
|
# Try to be nice and convert a string to a bool
|
||||||
|
if params.include?('CACHE_BLOCKS') and params['CACHE_BLOCKS'].is_a?(String)
|
||||||
|
if params['CACHE_BLOCKS'].downcase == 'true'
|
||||||
|
params['CACHE_BLOCKS'] = true
|
||||||
|
elsif params['CACHE_BLOCKS'].downcase == 'false'
|
||||||
|
params['CACHE_BLOCKS'] = false
|
||||||
|
else
|
||||||
|
raise(ArgumentError, "Expected CACHE_BLOCKS value to be a boolean or the string 'true' or 'false'")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Merge params with defaults
|
# Merge params with defaults
|
||||||
params = {
|
params = {
|
||||||
'INTERVAL' => 1000,
|
'INTERVAL' => 1000,
|
||||||
'CACHE' => 10
|
'CACHE' => 10,
|
||||||
|
'CACHE_BLOCKS' => false
|
||||||
}.merge(params)
|
}.merge(params)
|
||||||
|
|
||||||
scan = table._hash_to_scan(params)
|
scan = table._hash_to_scan(params)
|
||||||
# Call the counter method
|
# Call the counter method
|
||||||
@start_time = Time.now
|
@start_time = Time.now
|
||||||
formatter.header
|
formatter.header
|
||||||
count = table._count_internal(params['INTERVAL'].to_i, scan) do |cnt, row|
|
count = table._count_internal(params['INTERVAL'].to_i, scan, params['CACHE_BLOCKS']) do |cnt, row|
|
||||||
formatter.row(["Current count: #{cnt}, row: #{row}"])
|
formatter.row(["Current count: #{cnt}, row: #{row}"])
|
||||||
end
|
end
|
||||||
formatter.footer(count)
|
formatter.footer(count)
|
||||||
|
|
Loading…
Reference in New Issue