HBASE-2331 [shell] count command needs a way to specify scan caching

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@923979 13f79535-47bb-0310-9956-ffa450edef68
2010-03-16 21:04:29 +00:00 · 2010-03-16 21:04:29 +00:00 · 0358a0bf57
parent dfc23200bd
commit 0358a0bf57
4 changed files with 25 additions and 6 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -439,6 +439,8 @@ Release 0.21.0 - Unreleased
   HBASE-2313  Nit-pick about hbase-2279 shell fixup, if you do get with
               non-existant column family, throws lots of exceptions
               (Alexey Kovyrin via Stack)
   HBASE-2331  [shell] count command needs a way to specify scan caching
               (Alexey Kovyrin via Stack)
  NEW FEATURES
   HBASE-1961  HBase EC2 scripts
--- a/core/src/main/ruby/hbase.rb
+++ b/core/src/main/ruby/hbase.rb
@ -31,6 +31,8 @@ module HBaseConstants
  MAXLENGTH = "MAXLENGTH"
  CACHE_BLOCKS = "CACHE_BLOCKS"
  REPLICATION_SCOPE = "REPLICATION_SCOPE"
  INTERVAL = 'INTERVAL'
  CACHE = 'CACHE'
  # Load constants from hbase java API
  def self.promote_constants(constants)
--- a/core/src/main/ruby/hbase/table.rb
+++ b/core/src/main/ruby/hbase/table.rb
@ -63,11 +63,11 @@ module Hbase
    #----------------------------------------------------------------------------------------------
    # Count rows in a table
-    def count(interval = 1000)
+    def count(interval = 1000, caching_rows = 10)
      # We can safely set scanner caching with the first key only filter
      scan = Scan.new
      scan.cache_blocks = false
-      scan.caching = 10
+      scan.caching = caching_rows
      scan.setFilter(FirstKeyOnlyFilter.new)
      # Run the scanner
--- a/core/src/main/ruby/shell/commands/count.rb
+++ b/core/src/main/ruby/shell/commands/count.rb
@ -6,17 +6,32 @@ module Shell
          Count the number of rows in a table. This operation may take a LONG
          time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a
          counting mapreduce job). Current count is shown every 1000 rows by
-          default. Count interval may be optionally specified. Examples:
+          default. Count interval may be optionally specified. Scan caching
          is enabled on count scans by default. Default cache size is 10 rows.
          If your rows are small in size, you may want to increase this
          parameter. Examples:
          hbase> count 't1'
-          hbase> count 't1', 100000
+          hbase> count 't1', INTERVAL => 100000
          hbase> count 't1', CACHE => 1000
          hbase> count 't1', INTERVAL => 10, CACHE => 1000
        EOF
      end
-      def command(table, interval = 1000)
+      def command(table, params = {})
        # If the second parameter is an integer, then it is the old command syntax
        params = { 'INTERVAL' => params } if params.kind_of?(Fixnum)
        # Merge params with defaults
        params = {
          'INTERVAL' => 1000,
          'CACHE' => 10
        }.merge(params)
        # Call the counter method
        now = Time.now
        formatter.header
-        count = table(table).count(interval) do |cnt, row|
+        count = table(table).count(params['INTERVAL'].to_i, params['CACHE'].to_i) do |cnt, row|
          formatter.row([ "Current count: #{cnt}, row: #{row}" ])
        end
        formatter.footer(now, count)