From c7ad42867dbf35c5d07e4e3085edc33332fd2145 Mon Sep 17 00:00:00 2001 From: tedyu Date: Tue, 29 Mar 2016 10:29:56 -0700 Subject: [PATCH] HBASE-15191 CopyTable and VerifyReplication - Option to specify batch size, versions (Parth Shah) --- .../hadoop/hbase/mapreduce/CopyTable.java | 27 ++++++++++++++++--- .../replication/VerifyReplication.java | 9 +++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java index 8d930d1d0d1..243e2430413 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java @@ -57,7 +57,9 @@ public class CopyTable extends Configured implements Tool { final static String NAME = "copytable"; long startTime = 0; - long endTime = 0; + long endTime = HConstants.LATEST_TIMESTAMP; + int batch = Integer.MAX_VALUE; + int cacheRow = -1; int versions = -1; String tableName = null; String startRow = null; @@ -93,11 +95,16 @@ public class CopyTable extends Configured implements Tool { job.setJarByClass(CopyTable.class); Scan scan = new Scan(); + scan.setBatch(batch); scan.setCacheBlocks(false); - if (startTime != 0) { - scan.setTimeRange(startTime, - endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime); + + if (cacheRow > 0) { + scan.setCaching(cacheRow); + } else { + scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100)); } + + scan.setTimeRange(startTime, endTime); if (allCells) { scan.setRaw(true); } @@ -257,6 +264,18 @@ public class CopyTable extends Configured implements Tool { endTime = Long.parseLong(cmd.substring(endTimeArgKey.length())); continue; } + + final String batchArgKey = "--batch="; + if (cmd.startsWith(batchArgKey)) { + batch = Integer.parseInt(cmd.substring(batchArgKey.length())); + continue; + } + + final String cacheRowArgKey = "--cacheRow="; + if (cmd.startsWith(cacheRowArgKey)) { + cacheRow = Integer.parseInt(cmd.substring(cacheRowArgKey.length())); + continue; + } final String versionsArgKey = "--versions="; if (cmd.startsWith(versionsArgKey)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java index 75dfe9e81b9..a19cea5c5a3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java @@ -71,6 +71,7 @@ public class VerifyReplication extends Configured implements Tool { private final static String PEER_CONFIG_PREFIX = NAME + ".peer."; static long startTime = 0; static long endTime = Long.MAX_VALUE; + static int batch = Integer.MAX_VALUE; static int versions = -1; static String tableName = null; static String families = null; @@ -104,6 +105,8 @@ public class VerifyReplication extends Configured implements Tool { if (replicatedScanner == null) { Configuration conf = context.getConfiguration(); final Scan scan = new Scan(); + scan.setBatch(batch); + scan.setCacheBlocks(false); scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1)); long startTime = conf.getLong(NAME + ".startTime", 0); long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE); @@ -330,6 +333,12 @@ public class VerifyReplication extends Configured implements Tool { versions = Integer.parseInt(cmd.substring(versionsArgKey.length())); continue; } + + final String batchArgKey = "--batch="; + if (cmd.startsWith(batchArgKey)) { + batch = Integer.parseInt(cmd.substring(batchArgKey.length())); + continue; + } final String familiesArgKey = "--families="; if (cmd.startsWith(familiesArgKey)) {