diff --git a/CHANGES.txt b/CHANGES.txt index 87b3b24e34f..f1e09036991 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -439,6 +439,8 @@ Release 0.21.0 - Unreleased HBASE-2313 Nit-pick about hbase-2279 shell fixup, if you do get with non-existant column family, throws lots of exceptions (Alexey Kovyrin via Stack) + HBASE-2331 [shell] count command needs a way to specify scan caching + (Alexey Kovyrin via Stack) NEW FEATURES HBASE-1961 HBase EC2 scripts diff --git a/core/src/main/ruby/hbase.rb b/core/src/main/ruby/hbase.rb index de9d0063e33..4ba2a98ee23 100644 --- a/core/src/main/ruby/hbase.rb +++ b/core/src/main/ruby/hbase.rb @@ -31,6 +31,8 @@ module HBaseConstants MAXLENGTH = "MAXLENGTH" CACHE_BLOCKS = "CACHE_BLOCKS" REPLICATION_SCOPE = "REPLICATION_SCOPE" + INTERVAL = 'INTERVAL' + CACHE = 'CACHE' # Load constants from hbase java API def self.promote_constants(constants) diff --git a/core/src/main/ruby/hbase/table.rb b/core/src/main/ruby/hbase/table.rb index 7e1c80851ed..51115bf7bac 100644 --- a/core/src/main/ruby/hbase/table.rb +++ b/core/src/main/ruby/hbase/table.rb @@ -63,11 +63,11 @@ module Hbase #---------------------------------------------------------------------------------------------- # Count rows in a table - def count(interval = 1000) + def count(interval = 1000, caching_rows = 10) # We can safely set scanner caching with the first key only filter scan = Scan.new scan.cache_blocks = false - scan.caching = 10 + scan.caching = caching_rows scan.setFilter(FirstKeyOnlyFilter.new) # Run the scanner diff --git a/core/src/main/ruby/shell/commands/count.rb b/core/src/main/ruby/shell/commands/count.rb index 43417761a8f..44b39c85444 100644 --- a/core/src/main/ruby/shell/commands/count.rb +++ b/core/src/main/ruby/shell/commands/count.rb @@ -6,17 +6,32 @@ module Shell Count the number of rows in a table. This operation may take a LONG time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a counting mapreduce job). Current count is shown every 1000 rows by - default. Count interval may be optionally specified. Examples: + default. Count interval may be optionally specified. Scan caching + is enabled on count scans by default. Default cache size is 10 rows. + If your rows are small in size, you may want to increase this + parameter. Examples: hbase> count 't1' - hbase> count 't1', 100000 + hbase> count 't1', INTERVAL => 100000 + hbase> count 't1', CACHE => 1000 + hbase> count 't1', INTERVAL => 10, CACHE => 1000 EOF end - def command(table, interval = 1000) + def command(table, params = {}) + # If the second parameter is an integer, then it is the old command syntax + params = { 'INTERVAL' => params } if params.kind_of?(Fixnum) + + # Merge params with defaults + params = { + 'INTERVAL' => 1000, + 'CACHE' => 10 + }.merge(params) + + # Call the counter method now = Time.now formatter.header - count = table(table).count(interval) do |cnt, row| + count = table(table).count(params['INTERVAL'].to_i, params['CACHE'].to_i) do |cnt, row| formatter.row([ "Current count: #{cnt}, row: #{row}" ]) end formatter.footer(now, count)