From 3039d610149aa9a633141bf41abcbd1923dcdff8 Mon Sep 17 00:00:00 2001 From: Kannan Muthukkaruppan Date: Thu, 17 Nov 2011 23:01:59 +0000 Subject: [PATCH] HBASE-4628 [jira] Enhance Table Create Presplit Functionality within the HBase Shell Summary: HBase shell can algorithmically split a new table at creation This change adds optional arguments to the HBase shell's create command to split a table into a specified number of regions using a specified splitting algorithm as defined by RegionSplitter. Currently, we allow the user to presplit in the HBase shell by explicitly listing the startkey of all the region shards that they want. Instead, we should provide the RegionSplitter functionality of choosing a split algorithm, followed by the number of splits that they want. Test Plan: Created tables with and without splits using the shell; also attampted to give incorrect arguments to shell create command. Reviewers: nspiegelberg, JIRA Reviewed By: nspiegelberg CC: nspiegelberg, lhofhansl Differential Revision: 429 git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1203413 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/hadoop/hbase/util/RegionSplitter.java | 2 +- src/main/ruby/hbase.rb | 2 ++ src/main/ruby/hbase/admin.rb | 8 ++++++++ src/main/ruby/shell/commands/create.rb | 3 +++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java index 30f17ee2cc4..bd524e7ef4e 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java +++ b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java @@ -584,7 +584,7 @@ public class RegionSplitter { * @throws IOException if the specified SplitAlgorithm class couldn't be * instantiated */ - static SplitAlgorithm newSplitAlgoInstance(Configuration conf, + public static SplitAlgorithm newSplitAlgoInstance(Configuration conf, String splitClassName) throws IOException { Class splitClass; diff --git a/src/main/ruby/hbase.rb b/src/main/ruby/hbase.rb index 4d27191aea9..3ef4a82254f 100644 --- a/src/main/ruby/hbase.rb +++ b/src/main/ruby/hbase.rb @@ -53,6 +53,8 @@ module HBaseConstants FILTER = 'FILTER' SPLITS = 'SPLITS' SPLITS_FILE = 'SPLITS_FILE' + SPLITALGO = 'SPLITALGO' + NUMREGIONS = 'NUMREGIONS' # Load constants from hbase java API def self.promote_constants(constants) diff --git a/src/main/ruby/hbase/admin.rb b/src/main/ruby/hbase/admin.rb index 33cd2088fdf..02f01e20b2a 100644 --- a/src/main/ruby/hbase/admin.rb +++ b/src/main/ruby/hbase/admin.rb @@ -30,6 +30,7 @@ module Hbase def initialize(configuration, formatter) @admin = org.apache.hadoop.hbase.client.HBaseAdmin.new(configuration) connection = @admin.getConnection() + @conf = configuration @zk_wrapper = connection.getZooKeeperWatcher() zk = @zk_wrapper.getRecoverableZooKeeper().getZooKeeper() @zk_main = org.apache.zookeeper.ZooKeeperMain.new(zk) @@ -198,6 +199,13 @@ module Hbase splits[idx] = split.to_java_bytes idx = idx + 1 end + elsif arg.kind_of?(Hash) and (arg.has_key?(NUMREGIONS) or arg.has_key?(SLITALGO)) + raise(ArgumentError, "Number of regions must be specified") unless arg.has_key?(NUMREGIONS) + raise(ArgumentError, "Split algorithm must be specified") unless arg.has_key?(SPLITALGO) + raise(ArgumentError, "Number of regions must be geter than 1") unless arg[NUMREGIONS] > 1 + num_regions = arg[NUMREGIONS] + split_algo = org.apache.hadoop.hbase.util.RegionSplitter.newSplitAlgoInstance(@conf, arg[SPLITALGO]) + splits = split_algo.split(JInteger.valueOf(num_regions)) else # Add column to the table descriptor = hcd(arg, htd) diff --git a/src/main/ruby/shell/commands/create.rb b/src/main/ruby/shell/commands/create.rb index e67e8c1a180..14c1b0ff4f6 100644 --- a/src/main/ruby/shell/commands/create.rb +++ b/src/main/ruby/shell/commands/create.rb @@ -35,6 +35,9 @@ Examples: hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, BLOCKCACHE => true} hbase> create 't1', 'f1', {SPLITS => ['10', '20', '30', '40']} hbase> create 't1', 'f1', {SPLITS_FILE => 'splits.txt'} + hbase> # Optionally pre-split the table into NUMREGIONS, using + hbase> # SPLITALGO ("HexStringSplit", "UniformSplit" or classname) + hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit'} EOF end